framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,96,1,128,1,float16,float16,0,47.88651021321615
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,96,1,128,1,fp8,fp8,0,36.861610412597656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,96,1,128,1,float16,fp8,0,48.52448018391927
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,96,4,128,1,float16,float16,0,48.4840087890625
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,96,4,128,1,float16,fp8,0,48.06946818033854
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,96,4,128,1,fp8,fp8,0,37.05483754475912
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,96,8,128,1,float16,float16,0,48.10905456542969
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,96,8,128,1,float16,fp8,0,48.2440439860026
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,96,8,128,1,fp8,fp8,0,37.1415049235026
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,96,96,128,1,float16,float16,0,24.556203206380207
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,96,1,128,1,float16,float16,0,23.453519185384113
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,96,1,128,1,float16,fp8,0,22.92034149169922
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,96,1,128,1,fp8,fp8,0,18.14698664347331
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,96,96,128,1,float16,fp8,0,24.362660725911457
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,96,96,128,1,fp8,fp8,0,18.791765848795574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,96,4,128,1,float16,float16,0,23.43237813313802
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,96,4,128,1,float16,fp8,0,23.53442637125651
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,96,4,128,1,fp8,fp8,0,18.2654291788737
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,96,8,128,1,float16,float16,0,23.319737752278645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,96,8,128,1,float16,fp8,0,23.331326802571613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,96,8,128,1,fp8,fp8,0,18.205333709716797
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,96,1,128,1,float16,float16,0,11.93130111694336
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,96,96,128,1,float16,float16,0,12.405077616373697
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,96,1,128,1,float16,fp8,0,11.881301879882812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,96,96,128,1,float16,fp8,0,12.376752217610678
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,96,1,128,1,fp8,fp8,0,9.378986358642578
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,96,96,128,1,fp8,fp8,0,9.737557093302408
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,96,4,128,1,float16,float16,0,11.874645233154297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,96,4,128,1,float16,fp8,0,12.140026092529297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,96,4,128,1,fp8,fp8,0,9.390928268432617
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,96,8,128,1,float16,float16,0,12.272655487060547
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,96,8,128,1,float16,fp8,0,12.011525472005209
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,96,8,128,1,fp8,fp8,0,9.390250523885092
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,96,96,128,1,float16,float16,0,6.0776106516520185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,96,96,128,1,float16,fp8,0,6.252890904744466
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,96,1,128,1,float16,float16,0,6.090752283732097
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,96,1,128,1,fp8,fp8,0,5.032618522644043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,96,1,128,1,float16,fp8,0,5.864277521769206
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,96,96,128,1,fp8,fp8,0,5.181952158610026
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,96,4,128,1,float16,float16,0,6.167205174763997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,96,4,128,1,float16,fp8,0,6.011573155721028
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,96,4,128,1,fp8,fp8,0,5.040800094604492
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,96,8,128,1,float16,float16,0,6.13973871866862
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,96,8,128,1,float16,fp8,0,6.133935928344727
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,96,8,128,1,fp8,fp8,0,5.049685478210449
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,96,1,128,1,float16,float16,0,27.892911275227863
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,96,1,128,1,float16,fp8,0,28.08593495686849
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,96,1,128,1,fp8,fp8,0,21.693440755208332
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,96,4,128,1,float16,float16,0,27.65191904703776
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,96,4,128,1,float16,fp8,0,28.215977986653645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,96,4,128,1,fp8,fp8,0,21.423792521158855
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,96,8,128,1,float16,float16,0,27.716094970703125
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,96,8,128,1,fp8,fp8,0,21.824175516764324
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,96,8,128,1,float16,fp8,0,27.694244384765625
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,96,1,128,1,float16,float16,0,14.10696029663086
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,96,1,128,1,float16,fp8,0,13.861204783121744
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,96,1,128,1,fp8,fp8,0,10.87948226928711
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,96,96,128,1,float16,fp8,0,14.267237345377604
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,96,96,128,1,float16,float16,0,14.525269826253256
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,96,4,128,1,float16,float16,0,13.689029693603516
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,96,4,128,1,float16,fp8,0,13.963605244954428
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,96,4,128,1,fp8,fp8,0,10.906453450520834
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,96,96,128,1,fp8,fp8,0,11.408047993977865
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,96,8,128,1,float16,fp8,0,13.891583760579428
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,96,8,128,1,fp8,fp8,0,10.91976547241211
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,96,8,128,1,float16,float16,0,13.92401123046875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,96,96,128,1,float16,float16,0,7.11680539449056
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,96,1,128,1,float16,float16,0,6.76643180847168
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,96,96,128,1,float16,fp8,0,7.011850357055664
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,96,1,128,1,float16,fp8,0,6.774965286254883
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,96,1,128,1,fp8,fp8,0,5.705045064290364
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,96,96,128,1,fp8,fp8,0,5.9332319895426435
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,96,4,128,1,float16,float16,0,6.753125508626302
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,96,4,128,1,float16,fp8,0,6.615898768107097
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,96,4,128,1,fp8,fp8,0,5.710154851277669
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,96,8,128,1,float16,float16,0,6.89903450012207
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,96,8,128,1,float16,fp8,0,7.117647806803386
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,96,8,128,1,fp8,fp8,0,5.7128957112630205
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,96,96,128,1,float16,float16,0,3.631114641825358
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,96,96,128,1,float16,fp8,0,3.641296068827311
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,96,1,128,1,float16,float16,0,3.549877484639486
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,96,1,128,1,float16,fp8,0,3.58896541595459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,96,96,128,1,fp8,fp8,0,3.213237444559733
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,96,1,128,1,fp8,fp8,0,3.111765225728353
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,96,4,128,1,float16,float16,0,3.560271898905436
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,96,4,128,1,float16,fp8,0,3.5727361043294272
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,96,8,128,1,float16,float16,0,3.6372639338175454
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,96,4,128,1,fp8,fp8,0,3.111392021179199
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,96,8,128,1,float16,fp8,0,3.5713812510172525
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,96,8,128,1,fp8,fp8,0,3.1168800989786782
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,96,1,128,1,float16,float16,0,19.905707041422527
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,96,1,128,1,float16,fp8,0,19.301546732584637
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,96,1,128,1,fp8,fp8,0,15.5055783589681
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,96,4,128,1,float16,float16,0,19.554303487141926
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,96,4,128,1,float16,fp8,0,19.894442240397137
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,96,4,128,1,fp8,fp8,0,15.532709757486979
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,96,8,128,1,float16,float16,0,20.202154795328777
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,96,8,128,1,float16,fp8,0,20.143792470296223
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,96,8,128,1,fp8,fp8,0,15.611568450927734
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,96,1,128,1,float16,float16,0,9.860618591308594
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,96,1,128,1,float16,fp8,0,9.936389287312826
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,96,1,128,1,fp8,fp8,0,7.96449597676595
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,96,96,128,1,float16,float16,0,10.258431752522787
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,96,96,128,1,float16,fp8,0,10.322805404663086
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,96,96,128,1,fp8,fp8,0,8.38041623433431
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,96,4,128,1,float16,float16,0,9.913007736206055
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,96,4,128,1,float16,fp8,0,10.053967793782553
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,96,4,128,1,fp8,fp8,0,7.973552068074544
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,96,8,128,1,float16,float16,0,9.851733525594076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,96,8,128,1,float16,fp8,0,9.845418930053711
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,96,8,128,1,fp8,fp8,0,7.981050491333008
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,96,1,128,1,float16,float16,0,5.041674613952637
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,96,1,128,1,float16,fp8,0,5.004474639892578
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,96,96,128,1,float16,float16,0,5.092186609903972
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,96,96,128,1,float16,fp8,0,5.1768372853597
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,96,96,128,1,fp8,fp8,0,4.3846133550008135
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,96,1,128,1,fp8,fp8,0,4.18782393137614
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,96,4,128,1,float16,float16,0,4.99833615620931
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,96,4,128,1,float16,fp8,0,4.8039201100667315
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,96,4,128,1,fp8,fp8,0,4.195679982503255
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,96,8,128,1,float16,float16,0,4.86741320292155
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,96,8,128,1,float16,fp8,0,4.803584098815918
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,96,8,128,1,fp8,fp8,0,4.201818784077962
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,96,96,128,1,float16,fp8,0,2.6982399622599282
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,96,96,128,1,float16,float16,0,2.6606879234313965
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,96,96,128,1,fp8,fp8,0,2.401792049407959
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,96,1,128,1,float16,float16,0,2.5833813349405923
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,96,1,128,1,float16,fp8,0,2.582698663075765
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,96,1,128,1,fp8,fp8,0,2.309120019276937
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,96,4,128,1,float16,float16,0,2.58406400680542
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,96,4,128,1,float16,fp8,0,2.58679469426473
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,96,4,128,1,fp8,fp8,0,2.3159467379252114
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,96,8,128,1,float16,float16,0,2.590208053588867
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,96,8,128,1,float16,fp8,0,2.6313440004984536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,96,8,128,1,fp8,fp8,0,2.321061293284098
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,96,1,128,1,float16,float16,0,26.03620147705078
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,96,1,128,1,fp8,fp8,0,21.005306243896484
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,96,1,128,1,float16,fp8,0,26.5209223429362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,96,4,128,1,float16,float16,0,26.957305908203125
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,96,4,128,1,float16,fp8,0,26.722635904947918
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,96,4,128,1,fp8,fp8,0,21.099178314208984
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,96,8,128,1,float16,float16,0,27.151878356933594
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,96,8,128,1,float16,fp8,0,26.655057271321613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,96,8,128,1,fp8,fp8,0,21.12666193644206
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,96,1,128,1,float16,float16,0,13.443557739257812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,96,96,128,1,float16,float16,0,13.892250061035156
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,96,1,128,1,fp8,fp8,0,10.630309422810873
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,96,1,128,1,float16,fp8,0,13.125813802083334
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,96,96,128,1,float16,fp8,0,14.286000569661459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,96,4,128,1,float16,float16,0,13.327018737792969
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,96,4,128,1,float16,fp8,0,13.412181854248047
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,96,4,128,1,fp8,fp8,0,10.63918431599935
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,96,8,128,1,float16,float16,0,13.427541097005209
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,96,8,128,1,float16,fp8,0,13.441029866536459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,96,8,128,1,fp8,fp8,0,10.668533325195312
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,96,1,128,1,float16,float16,0,6.470832188924153
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,96,96,128,1,float16,float16,0,6.748677571614583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,96,96,128,1,float16,fp8,0,7.262383778889974
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,96,1,128,1,fp8,fp8,0,5.472085316975911
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,96,1,128,1,float16,fp8,0,6.589935938517253
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,96,96,128,1,fp8,fp8,0,5.826730728149414
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,96,4,128,1,float16,float16,0,6.664021174112956
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,96,4,128,1,float16,fp8,0,6.659936269124349
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,96,4,128,1,fp8,fp8,0,5.498709360758464
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,96,8,128,1,float16,float16,0,6.514197031656901
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,96,8,128,1,fp8,fp8,0,5.515621185302734
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,96,8,128,1,float16,fp8,0,6.737749099731445
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,96,96,128,1,float16,fp8,0,3.47377077738444
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,96,96,128,1,float16,float16,0,3.4502026240030923
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,96,1,128,1,float16,float16,0,3.312650680541992
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,96,1,128,1,float16,fp8,0,3.2698081334431968
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,96,1,128,1,fp8,fp8,0,2.9059359232584634
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,96,96,128,1,fp8,fp8,0,3.0772854487101235
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,96,4,128,1,float16,float16,0,3.320495923360189
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,96,4,128,1,float16,fp8,0,3.3304001490275064
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,96,4,128,1,fp8,fp8,0,2.914133389790853
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,96,8,128,1,float16,float16,0,3.3242454528808594
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,96,8,128,1,float16,fp8,0,3.3095680872599282
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,96,8,128,1,fp8,fp8,0,2.9258880615234375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,96,96,128,1,float16,float16,0,1.8870612780253093
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,96,1,128,1,float16,float16,0,1.8211839993794758
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,96,96,128,1,float16,fp8,0,1.8897973696390789
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,96,96,128,1,fp8,fp8,0,1.7018826802571614
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,96,1,128,1,float16,fp8,0,1.8256212870279949
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,96,1,128,1,fp8,fp8,0,1.626970609029134
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,96,4,128,1,float16,float16,0,1.8027520179748535
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,96,4,128,1,float16,fp8,0,1.808901309967041
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,96,4,128,1,fp8,fp8,0,1.6303733189900715
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,96,8,128,1,float16,float16,0,1.806831995646159
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,96,8,128,1,float16,fp8,0,1.8136746088663738
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,96,8,128,1,fp8,fp8,0,1.6327999432881672
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,96,1,128,1,float16,fp8,0,16.06161626180013
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,96,1,128,1,float16,float16,0,16.211973826090496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,96,1,128,1,fp8,fp8,0,13.048325856526693
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,96,4,128,1,float16,float16,0,16.48536554972331
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,96,4,128,1,float16,fp8,0,16.175103505452473
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,96,4,128,1,fp8,fp8,0,13.127680460611979
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,96,8,128,1,float16,float16,0,16.301226298014324
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,96,8,128,1,fp8,fp8,0,13.148858388264975
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,96,8,128,1,float16,fp8,0,16.317092895507812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,96,1,128,1,float16,float16,0,7.795717239379883
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,96,1,128,1,float16,fp8,0,8.023898442586264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,96,96,128,1,float16,fp8,0,8.750944137573242
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,96,1,128,1,fp8,fp8,0,6.641498565673828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,96,96,128,1,float16,float16,0,8.477706909179688
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,96,96,128,1,fp8,fp8,0,7.150911966959636
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,96,4,128,1,float16,float16,0,7.963829040527344
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,96,4,128,1,float16,fp8,0,8.058368047078451
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,96,4,128,1,fp8,fp8,0,6.649344126383464
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,96,8,128,1,float16,float16,0,7.827802658081055
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,96,8,128,1,float16,fp8,0,7.981578826904297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,96,8,128,1,fp8,fp8,0,6.682111740112305
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,96,96,128,1,float16,float16,0,4.259178797403972
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,96,1,128,1,float16,float16,0,3.9973653157552085
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,96,96,128,1,float16,fp8,0,4.321119944254558
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,96,96,128,1,fp8,fp8,0,3.7039785385131836
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,96,1,128,1,float16,fp8,0,3.8814827601114907
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,96,1,128,1,fp8,fp8,0,3.4505386352539062
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,96,4,128,1,float16,fp8,0,3.9661280314127603
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,96,4,128,1,float16,float16,0,3.954533259073893
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,96,4,128,1,fp8,fp8,0,3.457365353902181
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,96,8,128,1,float16,float16,0,3.8595946629842124
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,96,8,128,1,float16,fp8,0,3.9545227686564126
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,96,8,128,1,fp8,fp8,0,3.4710187911987305
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,96,96,128,1,float16,float16,0,2.172250588734945
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,96,96,128,1,float16,fp8,0,2.1886293093363443
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,96,96,128,1,fp8,fp8,0,1.9770026206970215
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,96,1,128,1,float16,float16,0,2.064725399017334
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,96,1,128,1,float16,fp8,0,2.0560213724772134
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,96,1,128,1,fp8,fp8,0,1.856170654296875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,96,4,128,1,float16,float16,0,2.044245402018229
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,96,4,128,1,float16,fp8,0,2.0483412742614746
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,96,4,128,1,fp8,fp8,0,1.8588959376017253
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,96,8,128,1,float16,float16,0,2.0524373054504395
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,96,8,128,1,float16,fp8,0,2.054485321044922
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,96,8,128,1,fp8,fp8,0,1.865045388539632
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,96,96,128,1,float16,float16,0,1.1946667035420735
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,96,96,128,1,float16,fp8,0,1.2124213377634685
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,96,96,128,1,fp8,fp8,0,1.1146186987559001
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,96,1,128,1,float16,fp8,0,1.1539093653361003
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,96,1,128,1,fp8,fp8,0,1.0597013632456462
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,96,1,128,1,float16,float16,0,1.149781306584676
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,96,4,128,1,float16,float16,0,1.163434664408366
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,96,4,128,1,float16,fp8,0,1.1682133674621582
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,96,4,128,1,fp8,fp8,0,1.065130631128947
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,96,8,128,1,float16,float16,0,1.167866627375285
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,96,8,128,1,fp8,fp8,0,1.067855993906657
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,96,8,128,1,float16,fp8,0,1.1596852938334148
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,96,1,128,1,float16,fp8,0,16.833882649739582
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,96,1,128,1,float16,float16,0,16.74717839558919
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,96,1,128,1,fp8,fp8,0,14.001487731933594
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,96,4,128,1,float16,float16,0,16.65945561726888
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,96,4,128,1,float16,fp8,0,16.410111745198567
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,96,4,128,1,fp8,fp8,0,14.010725657145182
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,96,8,128,1,float16,float16,0,16.58282725016276
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,96,8,128,1,float16,fp8,0,16.545108795166016
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,96,8,128,1,fp8,fp8,0,14.098431905110678
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,96,1,128,1,float16,float16,0,7.989760080973308
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,96,1,128,1,float16,fp8,0,8.10854403177897
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,96,96,128,1,float16,float16,0,8.73625628153483
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,96,1,128,1,fp8,fp8,0,7.034533182779948
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,96,96,128,1,float16,fp8,0,8.782661437988281
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,96,4,128,1,float16,fp8,0,8.12119992574056
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,96,4,128,1,float16,float16,0,7.936869303385417
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,96,4,128,1,fp8,fp8,0,7.079935709635417
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,96,8,128,1,float16,float16,0,8.053936004638672
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,96,8,128,1,float16,fp8,0,8.261632283528646
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,96,8,128,1,fp8,fp8,0,7.054677327473958
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,96,96,128,1,float16,float16,0,4.424549420674642
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,96,1,128,1,float16,float16,0,3.9169705708821616
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,96,1,128,1,fp8,fp8,0,3.5938987731933594
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,96,1,128,1,float16,fp8,0,4.023978551228841
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,96,96,128,1,float16,fp8,0,4.354053179423015
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,96,96,128,1,fp8,fp8,0,3.947866757710775
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,96,4,128,1,float16,fp8,0,3.9669812520345054
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,96,4,128,1,float16,float16,0,4.004895846048991
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,96,4,128,1,fp8,fp8,0,3.617114702860514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,96,8,128,1,float16,float16,0,4.068677266438802
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,96,8,128,1,float16,fp8,0,4.011013348897298
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,96,8,128,1,fp8,fp8,0,3.627354621887207
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,96,96,128,1,float16,float16,0,2.1831679344177246
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,96,96,128,1,float16,fp8,0,2.232485294342041
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,96,1,128,1,float16,float16,0,2.0408266385396323
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,96,1,128,1,fp8,fp8,0,1.893887996673584
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,96,1,128,1,float16,fp8,0,2.0432373682657876
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,96,96,128,1,fp8,fp8,0,2.0551679929097495
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,96,4,128,1,float16,float16,0,2.0456053415934243
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,96,4,128,1,float16,fp8,0,2.041541258494059
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,96,4,128,1,fp8,fp8,0,1.8962720235188801
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,96,8,128,1,float16,float16,0,2.0473225911458335
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,96,8,128,1,float16,fp8,0,2.0606346130371094
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,96,8,128,1,fp8,fp8,0,1.8996906280517578
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,96,96,128,1,float16,float16,0,1.1729706923166912
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,96,96,128,1,float16,fp8,0,1.1946667035420735
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,96,1,128,1,float16,float16,0,1.1112106641133626
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,96,1,128,1,float16,fp8,0,1.1163252989451091
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,96,96,128,1,fp8,fp8,0,1.1091626485188801
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,96,1,128,1,fp8,fp8,0,1.035434643427531
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,96,4,128,1,float16,float16,0,1.1122506459554036
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,96,4,128,1,float16,fp8,0,1.1146453221638997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,96,4,128,1,fp8,fp8,0,1.037823994954427
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,96,8,128,1,float16,float16,0,1.1153120199839275
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,96,8,128,1,float16,fp8,0,1.1187307039896648
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,96,8,128,1,fp8,fp8,0,1.0402133464813232
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,96,96,128,1,float16,float16,0,0.6707200209299723
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,96,96,128,1,fp8,fp8,0,0.644266684850057
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,96,1,128,1,float16,float16,0,0.6473546822865804
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,96,96,128,1,float16,fp8,0,0.6826666990915934
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,96,1,128,1,float16,fp8,0,0.6500693162282308
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,96,1,128,1,fp8,fp8,0,0.6041599909464518
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,96,4,128,1,float16,float16,0,0.6490399837493896
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,96,4,128,1,float16,fp8,0,0.6514399846394857
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,96,4,128,1,fp8,fp8,0,0.6075733502705892
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,96,8,128,1,float16,fp8,0,0.6546826759974161
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,96,8,128,1,float16,float16,0,0.6487040122350057
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,96,8,128,1,fp8,fp8,0,0.6110026836395264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,96,1,128,1,float16,float16,0,9.986053466796875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,96,1,128,1,float16,fp8,0,10.291029612223307
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,96,1,128,1,fp8,fp8,0,9.338703791300455
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,96,4,128,1,float16,float16,0,10.293247858683268
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,96,4,128,1,float16,fp8,0,10.416821161905924
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,96,4,128,1,fp8,fp8,0,9.329120000203451
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,96,8,128,1,float16,float16,0,10.316650390625
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,96,8,128,1,float16,fp8,0,10.538666407267252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,96,8,128,1,fp8,fp8,0,9.37557856241862
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,96,1,128,1,float16,float16,0,5.035690625508626
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,96,1,128,1,float16,fp8,0,4.98090140024821
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,96,1,128,1,fp8,fp8,0,4.688725471496582
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,96,96,128,1,float16,fp8,0,5.702661514282227
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,96,96,128,1,float16,float16,0,5.419375737508138
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,96,4,128,1,float16,float16,0,5.103103955586751
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,96,96,128,1,fp8,fp8,0,5.202773412068685
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,96,4,128,1,float16,fp8,0,4.982613245646159
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,96,4,128,1,fp8,fp8,0,4.699306805928548
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,96,8,128,1,float16,float16,0,5.076143900553386
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,96,8,128,1,float16,fp8,0,5.007189432779948
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,96,8,128,1,fp8,fp8,0,4.75
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,96,1,128,1,float16,float16,0,2.5677013397216797
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,96,1,128,1,float16,fp8,0,2.5751892725626626
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,96,96,128,1,float16,float16,0,2.7610559463500977
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,96,96,128,1,float16,fp8,0,2.8078133265177407
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,96,1,128,1,fp8,fp8,0,2.419882615407308
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,96,96,128,1,fp8,fp8,0,2.6644479433695474
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,96,4,128,1,float16,float16,0,2.5700693130493164
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,96,4,128,1,float16,fp8,0,2.5670080184936523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,96,4,128,1,fp8,fp8,0,2.4236319859822593
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,96,8,128,1,float16,float16,0,2.574848016103109
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,96,8,128,1,fp8,fp8,0,2.437631924947103
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,96,8,128,1,float16,fp8,0,2.612394650777181
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,96,96,128,1,float16,float16,0,1.446741263071696
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,96,1,128,1,float16,float16,0,1.3540639877319336
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,96,96,128,1,float16,fp8,0,1.4704640706380208
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,96,96,128,1,fp8,fp8,0,1.4120960235595703
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,96,1,128,1,fp8,fp8,0,1.2750720183054607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,96,1,128,1,float16,fp8,0,1.3595520655314128
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,96,4,128,1,float16,float16,0,1.3605546951293945
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,96,4,128,1,float16,fp8,0,1.3615786234537761
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,96,8,128,1,float16,float16,0,1.3666987419128418
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,96,4,128,1,fp8,fp8,0,1.2842666308085124
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,96,8,128,1,float16,fp8,0,1.3701066970825195
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,96,8,128,1,fp8,fp8,0,1.287338654200236
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,96,96,128,1,float16,float16,0,0.7923999627431234
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,96,96,128,1,float16,fp8,0,0.8060479958852133
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,96,96,128,1,fp8,fp8,0,0.7666400273640951
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,96,1,128,1,float16,float16,0,0.7522826989491781
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,96,1,128,1,float16,fp8,0,0.7502506573994955
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,96,1,128,1,fp8,fp8,0,0.7079253196716309
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,96,4,128,1,float16,float16,0,0.7492213249206543
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,96,4,128,1,float16,fp8,0,0.7516160011291504
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,96,4,128,1,fp8,fp8,0,0.7072479724884033
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,96,8,128,1,float16,float16,0,0.7519573370615641
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,96,8,128,1,float16,fp8,0,0.7557120323181152
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,96,96,128,1,float16,float16,0,0.46302398045857746
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,96,8,128,1,fp8,fp8,0,0.710314671198527
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,96,96,128,1,float16,fp8,0,0.47121067841847736
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,96,96,128,1,fp8,fp8,0,0.4476586580276489
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,96,1,128,1,float16,float16,0,0.44153066476186115
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,96,1,128,1,fp8,fp8,0,0.4220586617787679
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,96,1,128,1,float16,fp8,0,0.44257601102193195
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,96,4,128,1,float16,float16,0,0.4459520181020101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,96,4,128,1,float16,fp8,0,0.4479999939600627
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,96,4,128,1,fp8,fp8,0,0.4241120020548503
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,96,8,128,1,float16,float16,0,0.4490240017573039
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,96,8,128,1,float16,fp8,0,0.45413867632548016
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,96,8,128,1,fp8,fp8,0,0.4254986842473348
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,96,1,128,1,float16,float16,0,11.617616017659506
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,96,1,128,1,float16,fp8,0,11.413333892822266
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,96,1,128,1,fp8,fp8,0,11.094698588053385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,96,4,128,1,float16,float16,0,11.535530090332031
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,96,4,128,1,float16,fp8,0,11.431424458821615
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,96,4,128,1,fp8,fp8,0,11.097770690917969
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,96,8,128,1,float16,float16,0,11.52392578125
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,96,8,128,1,float16,fp8,0,11.615397135416666
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,96,8,128,1,fp8,fp8,0,11.165690104166666
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,96,1,128,1,float16,float16,0,5.720064163208008
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,96,1,128,1,fp8,fp8,0,5.538474400838216
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,96,1,128,1,float16,fp8,0,5.67569096883138
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,96,96,128,1,float16,fp8,0,6.297605514526367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,96,96,128,1,float16,float16,0,6.283946355183919
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,96,4,128,1,float16,float16,0,5.693440119425456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,96,4,128,1,float16,fp8,0,5.661018371582031
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,96,4,128,1,fp8,fp8,0,5.53335444132487
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,96,8,128,1,float16,float16,0,5.67466672261556
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,96,8,128,1,float16,fp8,0,5.724165598551433
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,96,8,128,1,fp8,fp8,0,5.569541295369466
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,96,1,128,1,float16,float16,0,2.881194750467936
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,96,96,128,1,float16,float16,0,3.1312214533487954
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,96,96,128,1,float16,fp8,0,3.161600112915039
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,96,1,128,1,float16,fp8,0,2.880517323811849
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,96,1,128,1,fp8,fp8,0,2.8037118911743164
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,96,96,128,1,fp8,fp8,0,3.1414772669474282
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,96,4,128,1,float16,float16,0,2.8919413884480796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,96,4,128,1,float16,fp8,0,2.903578758239746
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,96,4,128,1,fp8,fp8,0,2.822143872578939
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,96,8,128,1,float16,float16,0,2.9151573181152344
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,96,8,128,1,float16,fp8,0,2.9395627975463867
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,96,8,128,1,fp8,fp8,0,2.8337440490722656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,96,1,128,1,float16,float16,0,1.4987893104553223
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,96,96,128,1,float16,float16,0,1.6262826919555664
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,96,1,128,1,fp8,fp8,0,1.4603947003682454
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,96,1,128,1,float16,fp8,0,1.4988212585449219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,96,96,128,1,float16,fp8,0,1.6610986391703289
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,96,96,128,1,fp8,fp8,0,1.6191147168477376
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,96,4,128,1,float16,float16,0,1.4998186429341633
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,96,4,128,1,float16,fp8,0,1.496389389038086
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,96,4,128,1,fp8,fp8,0,1.4593706130981445
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,96,8,128,1,float16,fp8,0,1.5076746940612793
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,96,8,128,1,float16,float16,0,1.499824047088623
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,96,8,128,1,fp8,fp8,0,1.4675679206848145
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,96,96,128,1,float16,float16,0,0.8526240189870199
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,96,96,128,1,float16,fp8,0,0.8731146653493246
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,96,1,128,1,float16,float16,0,0.7913813591003418
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,96,1,128,1,float16,fp8,0,0.7965013186136881
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,96,96,128,1,fp8,fp8,0,0.8533386389414469
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,96,1,128,1,fp8,fp8,0,0.7693653106689453
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,96,4,128,1,float16,float16,0,0.7975200017293295
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,96,4,128,1,float16,fp8,0,0.7971839904785156
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,96,4,128,1,fp8,fp8,0,0.7734613418579102
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,96,8,128,1,float16,float16,0,0.7995733420054117
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,96,8,128,1,float16,fp8,0,0.8026506900787354
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,96,8,128,1,fp8,fp8,0,0.7768747011820475
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,96,96,128,1,float16,float16,0,0.4742826620737712
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,96,96,128,1,float16,fp8,0,0.48079999287923175
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,96,96,128,1,fp8,fp8,0,0.4705280065536499
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,96,1,128,1,float16,fp8,0,0.4452693462371826
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,96,1,128,1,float16,float16,0,0.44354132811228436
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,96,1,128,1,fp8,fp8,0,0.43263999621073407
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,96,4,128,1,float16,float16,0,0.44492268562316895
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,96,4,128,1,float16,fp8,0,0.44629331429799396
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,96,4,128,1,fp8,fp8,0,0.43434667587280273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,96,8,128,1,float16,float16,0,0.4476586580276489
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,96,8,128,1,float16,fp8,0,0.44970134894053143
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,96,8,128,1,fp8,fp8,0,0.4363946517308553
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,96,96,128,1,float16,float16,0,0.28484266996383667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,96,96,128,1,fp8,fp8,0,0.28177066644032794
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,96,96,128,1,float16,fp8,0,0.2916693290074666
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,96,1,128,1,float16,float16,0,0.2653813362121582
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,96,1,128,1,float16,fp8,0,0.267792006333669
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,96,1,128,1,fp8,fp8,0,0.26231465737024945
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,96,4,128,1,float16,float16,0,0.26573334137598675
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,96,4,128,1,float16,fp8,0,0.2667466600735982
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,96,4,128,1,fp8,fp8,0,0.26231465737024945
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,96,8,128,1,float16,float16,0,0.26812267303466797
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,96,8,128,1,float16,fp8,0,0.27084799607594806
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,96,8,128,1,fp8,fp8,0,0.2640213370323181
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,96,1,128,1,float16,float16,0,7.799466451009114
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,96,1,128,1,float16,fp8,0,7.783765157063802
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,96,1,128,1,fp8,fp8,0,7.8131256103515625
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,96,4,128,1,float16,float16,0,7.826090494791667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,96,4,128,1,float16,fp8,0,7.853392283121745
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,96,4,128,1,fp8,fp8,0,7.907157262166341
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,96,8,128,1,float16,float16,0,7.860911687215169
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,96,8,128,1,float16,fp8,0,7.898623784383138
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,96,8,128,1,fp8,fp8,0,7.909546534220378
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,96,1,128,1,float16,float16,0,3.909461339314779
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,96,1,128,1,float16,fp8,0,3.9265387852986655
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,96,96,128,1,float16,float16,0,4.306607882181804
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,96,96,128,1,float16,fp8,0,4.396202723185222
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,96,1,128,1,fp8,fp8,0,3.923797289530436
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,96,4,128,1,float16,float16,0,3.9369386037190757
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,96,96,128,1,fp8,fp8,0,4.393472035725911
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,96,4,128,1,fp8,fp8,0,3.9621973037719727
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,96,4,128,1,float16,fp8,0,3.924816131591797
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,96,8,128,1,float16,float16,0,3.9642454783121743
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,96,8,128,1,float16,fp8,0,3.9645973841349282
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,96,8,128,1,fp8,fp8,0,3.956416130065918
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,96,96,128,1,float16,float16,0,2.218005339304606
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,96,1,128,1,float16,float16,0,2.0094292958577475
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,96,1,128,1,float16,fp8,0,2.0012426376342773
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,96,96,128,1,float16,fp8,0,2.220037301381429
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,96,96,128,1,fp8,fp8,0,2.2416799863179526
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,96,1,128,1,fp8,fp8,0,2.0101119677225747
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,96,4,128,1,float16,float16,0,2.0176159540812173
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,96,4,128,1,float16,fp8,0,2.035711924235026
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,96,4,128,1,fp8,fp8,0,2.0077226956685386
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,96,8,128,1,float16,float16,0,2.0404906272888184
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,96,8,128,1,float16,fp8,0,2.027514616648356
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,96,8,128,1,fp8,fp8,0,2.0326453844706216
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,96,96,128,1,float16,fp8,0,1.1610399881998699
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,96,96,128,1,float16,float16,0,1.13101331392924
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,96,1,128,1,float16,float16,0,1.0480639934539795
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,96,96,128,1,fp8,fp8,0,1.171621322631836
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,96,1,128,1,float16,fp8,0,1.0415786902109783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,96,1,128,1,fp8,fp8,0,1.0415786902109783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,96,4,128,1,float16,float16,0,1.048751990000407
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,96,4,128,1,float16,fp8,0,1.0514720280965169
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,96,4,128,1,fp8,fp8,0,1.0439626375834148
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,96,8,128,1,float16,fp8,0,1.0593279997507732
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,96,8,128,1,float16,float16,0,1.0465280214945476
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,96,8,128,1,fp8,fp8,0,1.051151990890503
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,96,96,128,1,float16,float16,0,0.6000426610310873
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,96,96,128,1,float16,fp8,0,0.6191733280817667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,96,1,128,1,float16,float16,0,0.5638826688130697
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,96,96,128,1,fp8,fp8,0,0.6161119937896729
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,96,1,128,1,fp8,fp8,0,0.5580746730168661
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,96,1,128,1,float16,fp8,0,0.5594506661097208
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,96,4,128,1,float16,float16,0,0.559440016746521
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,96,4,128,1,float16,fp8,0,0.5604693492253622
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,96,4,128,1,fp8,fp8,0,0.5573920011520386
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,96,8,128,1,float16,float16,0,0.5625173250834147
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,96,8,128,1,float16,fp8,0,0.5652480125427246
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,96,8,128,1,fp8,fp8,0,0.5601119995117188
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,96,96,128,1,float16,float16,0,0.3418399890263875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,96,96,128,1,fp8,fp8,0,0.3449173370997111
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,96,96,128,1,float16,fp8,0,0.34594134489695233
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,96,1,128,1,float16,float16,0,0.3131733338038127
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,96,1,128,1,float16,fp8,0,0.31385600566864014
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,96,1,128,1,fp8,fp8,0,0.31590932607650757
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,96,4,128,1,float16,float16,0,0.31487999359766644
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,96,4,128,1,float16,fp8,0,0.31726932525634766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,96,4,128,1,fp8,fp8,0,0.31828800837198895
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,96,8,128,1,float16,float16,0,0.3193173408508301
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,96,8,128,1,float16,fp8,0,0.32102400064468384
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,96,8,128,1,fp8,fp8,0,0.3193173408508301
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,96,96,128,1,float16,float16,0,0.20922134319941202
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,96,96,128,1,float16,fp8,0,0.2136746644973755
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,96,96,128,1,fp8,fp8,0,0.21128533283869425
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,96,1,128,1,float16,float16,0,0.19285333156585693
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,96,1,128,1,float16,fp8,0,0.1967786749204
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,96,1,128,1,fp8,fp8,0,0.1909760038057963
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,96,4,128,1,float16,float16,0,0.19267733891805014
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,96,4,128,1,float16,fp8,0,0.19438934326171875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,96,4,128,1,fp8,fp8,0,0.19029333194096884
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,96,8,128,1,float16,float16,0,0.19370667139689127
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,96,8,128,1,float16,fp8,0,0.19643733898798624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,96,8,128,1,fp8,fp8,0,0.1930239995320638
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,96,1,128,1,float16,float16,0,8.986453374226889
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,96,1,128,1,float16,fp8,0,8.861530939737955
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,96,1,128,1,fp8,fp8,0,9.506645202636719
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,96,4,128,1,float16,float16,0,9.191509246826172
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,96,4,128,1,float16,fp8,0,9.093114852905273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,96,4,128,1,fp8,fp8,0,9.538735707600912
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,96,8,128,1,float16,float16,0,9.205941518147787
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,96,8,128,1,fp8,fp8,0,9.714335759480795
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,96,8,128,1,float16,fp8,0,9.152490615844727
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,96,1,128,1,float16,float16,0,4.351146697998047
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,96,1,128,1,float16,fp8,0,4.342613220214844
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,96,1,128,1,fp8,fp8,0,4.582570711771647
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,96,96,128,1,float16,float16,0,5.061637242635091
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,96,96,128,1,float16,fp8,0,4.943888028462728
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,96,4,128,1,float16,float16,0,4.500650723775228
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,96,4,128,1,float16,fp8,0,4.568565368652344
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,96,4,128,1,fp8,fp8,0,4.782416025797526
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,96,8,128,1,float16,float16,0,4.496896107991536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,96,8,128,1,float16,fp8,0,4.456960042317708
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,96,8,128,1,fp8,fp8,0,4.8595625559488935
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,96,96,128,1,float16,float16,0,2.536959966023763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,96,1,128,1,float16,float16,0,2.217306613922119
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,96,96,128,1,float16,fp8,0,2.477567990620931
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,96,1,128,1,float16,fp8,0,2.1879626909891763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,96,1,128,1,fp8,fp8,0,2.292234738667806
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,96,96,128,1,fp8,fp8,0,2.593616008758545
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,96,4,128,1,float16,float16,0,2.2323253949483237
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,96,4,128,1,float16,fp8,0,2.196842670440674
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,96,4,128,1,fp8,fp8,0,2.3782347043355307
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,96,8,128,1,float16,float16,0,2.2118399937947593
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,96,8,128,1,float16,fp8,0,2.2135465939839682
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,96,8,128,1,fp8,fp8,0,2.3586133321126304
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,96,96,128,1,float16,float16,0,1.2444960276285808
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,96,1,128,1,float16,float16,0,1.1050666968027751
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,96,96,128,1,float16,fp8,0,1.2223093509674072
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,96,1,128,1,float16,fp8,0,1.1057493686676025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,96,96,128,1,fp8,fp8,0,1.3114079634348552
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,96,1,128,1,fp8,fp8,0,1.1491040388743083
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,96,4,128,1,float16,float16,0,1.113594690958659
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,96,4,128,1,float16,fp8,0,1.1153066953023274
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,96,4,128,1,fp8,fp8,0,1.1603573163350422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,96,8,128,1,float16,float16,0,1.1200799942016602
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,96,8,128,1,float16,fp8,0,1.1180373032887776
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,96,8,128,1,fp8,fp8,0,1.172650655110677
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,96,96,128,1,float16,float16,0,0.6377813418706259
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,96,1,128,1,float16,float16,0,0.5672906637191772
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,96,96,128,1,float16,fp8,0,0.6201973358790079
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,96,96,128,1,fp8,fp8,0,0.6669653256734213
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,96,1,128,1,float16,fp8,0,0.5669493277867635
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,96,1,128,1,fp8,fp8,0,0.582693338394165
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,96,4,128,1,float16,float16,0,0.5676373243331909
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,96,4,128,1,float16,fp8,0,0.5686613321304321
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,96,4,128,1,fp8,fp8,0,0.5911893447240194
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,96,8,128,1,float16,float16,0,0.571071982383728
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,96,8,128,1,float16,fp8,0,0.5724106629689535
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,96,8,128,1,fp8,fp8,0,0.5932373205820719
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,96,96,128,1,float16,float16,0,0.33501867453257245
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,96,1,128,1,float16,float16,0,0.295418659845988
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,96,96,128,1,float16,fp8,0,0.3223946690559387
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,96,1,128,1,float16,fp8,0,0.2950826684633891
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,96,1,128,1,fp8,fp8,0,0.30430400371551514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,96,96,128,1,fp8,fp8,0,0.34457600116729736
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,96,4,128,1,float16,float16,0,0.2967840035756429
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,96,4,128,1,float16,fp8,0,0.29678932825724286
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,96,4,128,1,fp8,fp8,0,0.30668799082438153
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,96,8,128,1,float16,float16,0,0.30053865909576416
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,96,8,128,1,float16,fp8,0,0.2995199958483378
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,96,8,128,1,fp8,fp8,0,0.3104426662127177
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,96,96,128,1,float16,float16,0,0.18107734123865762
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,96,96,128,1,fp8,fp8,0,0.188591996828715
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,96,96,128,1,float16,fp8,0,0.180074671904246
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,96,1,128,1,float16,float16,0,0.15786666671435037
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,96,1,128,1,float16,fp8,0,0.15718400478363037
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,96,1,128,1,fp8,fp8,0,0.16383467117945352
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,96,4,128,1,float16,float16,0,0.15940800309181213
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,96,4,128,1,float16,fp8,0,0.1604320009549459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,96,4,128,1,fp8,fp8,0,0.16572266817092896
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,96,8,128,1,float16,float16,0,0.16196800271670023
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,96,8,128,1,float16,fp8,0,0.1616159975528717
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,96,8,128,1,fp8,fp8,0,0.16861865917841592
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,96,96,128,1,float16,float16,0,0.10376532872517903
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,96,96,128,1,float16,fp8,0,0.10068800052007039
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,96,96,128,1,fp8,fp8,0,0.10786133011182149
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,96,1,128,1,float16,fp8,0,0.09284266829490662
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,96,1,128,1,fp8,fp8,0,0.09011200070381165
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,96,1,128,1,float16,float16,0,0.09076266487439473
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,96,4,128,1,float16,float16,0,0.09011200070381165
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,96,4,128,1,float16,fp8,0,0.09046399593353271
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,96,4,128,1,fp8,fp8,0,0.0897706647713979
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,96,8,128,1,float16,float16,0,0.09113599856694539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,96,8,128,1,float16,fp8,0,0.09248532851537068
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,96,8,128,1,fp8,fp8,0,0.09147733449935913
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,96,1,128,1,float16,float16,0,7.850154876708984
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,96,1,128,1,float16,fp8,0,7.914666493733724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,96,1,128,1,fp8,fp8,0,8.515573501586914
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,96,4,128,1,float16,float16,0,8.034341176350912
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,96,4,128,1,float16,fp8,0,8.083285649617514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,96,4,128,1,fp8,fp8,0,8.536895751953125
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,96,8,128,1,float16,float16,0,8.10377057393392
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,96,8,128,1,float16,fp8,0,8.116394678751627
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,96,8,128,1,fp8,fp8,0,8.655871709187826
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,96,1,128,1,float16,float16,0,3.8241281509399414
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,96,96,128,1,float16,float16,0,4.551173210144043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,96,1,128,1,float16,fp8,0,3.820373217264811
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,96,96,128,1,float16,fp8,0,4.412927945454915
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,96,1,128,1,fp8,fp8,0,4.1516373952229815
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,96,4,128,1,float16,fp8,0,4.000090599060059
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,96,4,128,1,float16,float16,0,3.984037399291992
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,96,4,128,1,fp8,fp8,0,4.263253211975098
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,96,8,128,1,float16,float16,0,4.0035092035929365
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,96,8,128,1,float16,fp8,0,4.047701199849446
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,96,8,128,1,fp8,fp8,0,4.327088038126628
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,96,96,128,1,float16,float16,0,2.248191992441813
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,96,1,128,1,float16,float16,0,1.9198293685913086
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,96,96,128,1,float16,fp8,0,2.203989346822103
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,96,1,128,1,float16,fp8,0,1.9160745938618977
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,96,96,128,1,fp8,fp8,0,2.359648068745931
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,96,1,128,1,fp8,fp8,0,2.035685380299886
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,96,4,128,1,float16,float16,0,1.9365439414978027
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,96,4,128,1,float16,fp8,0,1.93178129196167
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,96,4,128,1,fp8,fp8,0,2.1362346013387046
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,96,8,128,1,float16,float16,0,1.94269863764445
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,96,8,128,1,float16,fp8,0,1.9369014104207356
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,96,8,128,1,fp8,fp8,0,2.102954705556234
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,96,96,128,1,float16,float16,0,1.1033600171407063
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,96,1,128,1,float16,float16,0,0.9736533164978027
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,96,96,128,1,float16,fp8,0,1.0825386842091878
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,96,1,128,1,fp8,fp8,0,1.0248106320699055
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,96,96,128,1,fp8,fp8,0,1.179477294286092
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,96,1,128,1,float16,fp8,0,0.974677324295044
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,96,4,128,1,float16,float16,0,0.9815093676249186
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,96,4,128,1,float16,fp8,0,0.9815093676249186
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,96,8,128,1,float16,float16,0,0.9883360068003336
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,96,4,128,1,fp8,fp8,0,1.0320266882578533
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,96,8,128,1,float16,fp8,0,0.9886720180511475
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,96,8,128,1,fp8,fp8,0,1.0443092981974285
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,96,96,128,1,float16,float16,0,0.5693120161692301
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,96,96,128,1,float16,fp8,0,0.5498666763305664
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,96,1,128,1,float16,float16,0,0.4995306730270386
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,96,96,128,1,fp8,fp8,0,0.5997493267059326
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,96,1,128,1,float16,fp8,0,0.49817601839701336
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,96,1,128,1,fp8,fp8,0,0.5239413181940714
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,96,4,128,1,float16,float16,0,0.4978400071461995
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,96,4,128,1,float16,fp8,0,0.49748798211415607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,96,4,128,1,fp8,fp8,0,0.5259946584701538
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,96,8,128,1,float16,float16,0,0.5002239942550659
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,96,8,128,1,fp8,fp8,0,0.532480001449585
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,96,8,128,1,float16,fp8,0,0.5016053517659506
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,96,96,128,1,float16,float16,0,0.2954346736272176
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,96,96,128,1,float16,fp8,0,0.28757333755493164
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,96,1,128,1,float16,float16,0,0.25941334168116253
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,96,1,128,1,float16,fp8,0,0.2602720061937968
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,96,96,128,1,fp8,fp8,0,0.31249066193898517
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,96,1,128,1,fp8,fp8,0,0.2739253242810567
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,96,4,128,1,float16,float16,0,0.2597493330637614
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,96,4,128,1,float16,fp8,0,0.2597493330637614
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,96,4,128,1,fp8,fp8,0,0.27426133553187054
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,96,8,128,1,float16,float16,0,0.2616320053736369
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,96,8,128,1,float16,fp8,0,0.26129066944122314
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,96,8,128,1,fp8,fp8,0,0.27800534168879193
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,96,96,128,1,float16,float16,0,0.1604266663392385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,96,96,128,1,fp8,fp8,0,0.17017066478729248
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,96,96,128,1,float16,fp8,0,0.15703999996185303
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,96,1,128,1,float16,float16,0,0.13874133427937826
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,96,1,128,1,float16,fp8,0,0.13875200351079306
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,96,1,128,1,fp8,fp8,0,0.14729066689809164
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,96,4,128,1,float16,float16,0,0.1397546629110972
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,96,4,128,1,float16,fp8,0,0.14080533385276794
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,96,4,128,1,fp8,fp8,0,0.14865066607793173
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,96,8,128,1,float16,float16,0,0.1430186629295349
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,96,8,128,1,float16,fp8,0,0.14235732952753702
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,96,8,128,1,fp8,fp8,0,0.15154666701952615
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,96,96,128,1,float16,float16,0,0.08945066730181377
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,96,96,128,1,float16,fp8,0,0.0890933374563853
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,96,96,128,1,fp8,fp8,0,0.09830400347709656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,96,1,128,1,float16,float16,0,0.0802346666653951
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,96,1,128,1,float16,fp8,0,0.07918400069077809
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,96,1,128,1,fp8,fp8,0,0.08226666847864787
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,96,4,128,1,float16,float16,0,0.07884799937407176
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,96,4,128,1,float16,fp8,0,0.07884799937407176
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,96,4,128,1,fp8,fp8,0,0.08089600006739299
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,96,8,128,1,float16,float16,0,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,96,8,128,1,float16,fp8,0,0.08020799855391185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,96,8,128,1,fp8,fp8,0,0.08193066716194153
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,96,96,128,1,float16,float16,0,0.052906667192777
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,96,96,128,1,float16,fp8,0,0.052906667192777
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,96,96,128,1,fp8,fp8,0,0.054272000988324486
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,96,1,128,1,float16,float16,0,0.05052266518274943
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,96,1,128,1,float16,fp8,0,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,96,1,128,1,fp8,fp8,0,0.04913066824277242
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,96,4,128,1,float16,float16,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,96,4,128,1,float16,fp8,0,0.04948266843954722
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,96,4,128,1,fp8,fp8,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,96,8,128,1,float16,float16,0,0.05087999999523163
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,96,8,128,1,float16,fp8,0,0.05120000243186951
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,96,8,128,1,fp8,fp8,0,0.05153599878152212
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,96,1,128,1,float16,float16,0,2.935130755106608
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,96,1,128,1,fp8,fp8,0,3.098479906717936
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,96,1,128,1,float16,fp8,0,2.9346132278442383
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,96,4,128,1,float16,float16,0,3.05511474609375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,96,4,128,1,float16,fp8,0,3.059541384379069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,96,4,128,1,fp8,fp8,0,3.175930658976237
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,96,8,128,1,float16,float16,0,3.0639785130818686
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,96,8,128,1,float16,fp8,0,3.0820693969726562
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,96,8,128,1,fp8,fp8,0,3.233274777730306
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,96,1,128,1,float16,float16,0,1.4776372909545898
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,96,1,128,1,float16,fp8,0,1.479680061340332
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,96,96,128,1,float16,float16,0,1.7850027084350586
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,96,96,128,1,float16,fp8,0,1.729365348815918
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,96,1,128,1,fp8,fp8,0,1.499135971069336
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,96,96,128,1,fp8,fp8,0,1.8109386761983235
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,96,4,128,1,float16,float16,0,1.4885600407918294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,96,4,128,1,float16,fp8,0,1.4909440676371257
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,96,4,128,1,fp8,fp8,0,1.5472639401753743
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,96,8,128,1,float16,float16,0,1.4957226117451985
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,96,8,128,1,float16,fp8,0,1.500501314798991
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,96,8,128,1,fp8,fp8,0,1.566383997599284
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,96,1,128,1,float16,float16,0,0.749567985534668
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,96,96,128,1,float16,float16,0,0.8864426612854004
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,96,96,128,1,float16,fp8,0,0.8656213283538818
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,96,1,128,1,fp8,fp8,0,0.7519360383351644
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,96,96,128,1,fp8,fp8,0,0.9130667050679525
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,96,1,128,1,float16,fp8,0,0.7533226807912191
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,96,4,128,1,float16,float16,0,0.7543466885884603
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,96,4,128,1,float16,fp8,0,0.7560640176137289
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,96,4,128,1,fp8,fp8,0,0.7666292985280355
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,96,8,128,1,float16,float16,0,0.7602293491363525
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,96,8,128,1,float16,fp8,0,0.7574186325073242
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,96,8,128,1,fp8,fp8,0,0.7717546621958414
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,96,96,128,1,float16,float16,0,0.4589279890060425
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,96,96,128,1,float16,fp8,0,0.4480106830596924
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,96,1,128,1,float16,float16,0,0.3882720073064168
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,96,1,128,1,float16,fp8,0,0.38997332255045575
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,96,1,128,1,fp8,fp8,0,0.3882613182067871
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,96,96,128,1,fp8,fp8,0,0.4633599917093913
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,96,4,128,1,float16,float16,0,0.38997332255045575
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,96,4,128,1,float16,fp8,0,0.3906506697336833
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,96,4,128,1,fp8,fp8,0,0.3882666826248169
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,96,8,128,1,float16,float16,0,0.3949226538340251
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,96,8,128,1,float16,fp8,0,0.3945866823196411
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,96,8,128,1,fp8,fp8,0,0.3920160134633382
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,96,96,128,1,float16,float16,0,0.24407466252644858
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,96,96,128,1,float16,fp8,0,0.23415466149648032
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,96,96,128,1,fp8,fp8,0,0.2450773318608602
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,96,1,128,1,float16,float16,0,0.20292266209920248
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,96,1,128,1,float16,fp8,0,0.2063360015551249
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,96,1,128,1,fp8,fp8,0,0.20530666907628378
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,96,4,128,1,float16,float16,0,0.2053333322207133
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,96,4,128,1,float16,fp8,0,0.20545599857966104
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,96,4,128,1,fp8,fp8,0,0.2041226625442505
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,96,8,128,1,float16,float16,0,0.20856000979741415
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,96,8,128,1,float16,fp8,0,0.20923733711242676
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,96,8,128,1,fp8,fp8,0,0.20838399728139242
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,96,96,128,1,float16,float16,0,0.1365333298842112
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,96,96,128,1,fp8,fp8,0,0.1360319952170054
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,96,96,128,1,float16,fp8,0,0.13312533497810364
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,96,1,128,1,float16,float16,0,0.11400533715883891
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,96,1,128,1,float16,fp8,0,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,96,1,128,1,fp8,fp8,0,0.11434666315714519
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,96,4,128,1,float16,float16,0,0.11434666315714519
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,96,4,128,1,float16,fp8,0,0.11434666315714519
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,96,4,128,1,fp8,fp8,0,0.11537599563598633
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,96,8,128,1,float16,float16,0,0.11637333035469055
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,96,8,128,1,float16,fp8,0,0.11708799997965495
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,96,8,128,1,fp8,fp8,0,0.11707199613253276
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,96,96,128,1,float16,float16,0,0.07577066620190938
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,96,96,128,1,float16,fp8,0,0.07509333391984303
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,96,96,128,1,fp8,fp8,0,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,96,1,128,1,float16,float16,0,0.06588266789913177
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,96,1,128,1,float16,fp8,0,0.06484800080458324
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,96,1,128,1,fp8,fp8,0,0.0634933312733968
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,96,4,128,1,float16,float16,0,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,96,4,128,1,float16,fp8,0,0.06451733410358429
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,96,4,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,96,8,128,1,float16,float16,0,0.06588266789913177
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,96,8,128,1,float16,fp8,0,0.06622399886449178
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,96,8,128,1,fp8,fp8,0,0.06417599817117055
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,96,96,128,1,float16,float16,0,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,96,96,128,1,float16,fp8,0,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,96,96,128,1,fp8,fp8,0,0.046426668763160706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,96,1,128,1,float16,float16,0,0.04368533194065094
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,96,1,128,1,float16,fp8,0,0.04337066908677419
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,96,1,128,1,fp8,fp8,0,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,96,4,128,1,float16,float16,0,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,96,4,128,1,float16,fp8,0,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,96,4,128,1,fp8,fp8,0,0.041637333730856575
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,96,8,128,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,96,8,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,96,8,128,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,96,96,128,1,float16,float16,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,96,96,128,1,float16,fp8,0,0.02934933453798294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,96,96,128,1,fp8,fp8,0,0.02899733434120814
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,96,1,128,1,float16,float16,0,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,96,1,128,1,float16,fp8,0,0.028677334388097126
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,96,1,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,96,4,128,1,float16,float16,0,0.028677334388097126
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,96,4,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,96,4,128,1,fp8,fp8,0,0.027642667293548584
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,96,8,128,1,float16,float16,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,96,8,128,1,float16,fp8,0,0.029690665503342945
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,96,8,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,96,1,128,1,float16,float16,0,1.5723466873168945
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,96,1,128,1,fp8,fp8,0,1.557162602742513
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,96,1,128,1,float16,fp8,0,1.571824073791504
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,96,4,128,1,float16,float16,0,1.587706724802653
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,96,4,128,1,float16,fp8,0,1.5842986106872559
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,96,4,128,1,fp8,fp8,0,1.616048018137614
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,96,8,128,1,float16,float16,0,1.593173344930013
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,96,8,128,1,float16,fp8,0,1.5897547403971355
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,96,8,128,1,fp8,fp8,0,1.6795306205749512
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,96,96,128,1,float16,float16,0,0.9357653458913168
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,96,1,128,1,float16,float16,0,0.794111967086792
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,96,96,128,1,float16,fp8,0,0.9123679796854655
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,96,1,128,1,fp8,fp8,0,0.7886506716410319
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,96,96,128,1,fp8,fp8,0,0.9500959714253744
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,96,1,128,1,float16,fp8,0,0.794111967086792
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,96,4,128,1,float16,float16,0,0.7971786657969157
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,96,4,128,1,float16,fp8,0,0.7978560129801432
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,96,4,128,1,fp8,fp8,0,0.7995680173238119
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,96,8,128,1,float16,fp8,0,0.8039999802907308
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,96,8,128,1,float16,float16,0,0.8033333619435629
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,96,8,128,1,fp8,fp8,0,0.812544027964274
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,96,96,128,1,float16,float16,0,0.47974932193756104
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,96,1,128,1,float16,float16,0,0.4078986644744873
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,96,1,128,1,float16,fp8,0,0.40859198570251465
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,96,96,128,1,fp8,fp8,0,0.48794134457906085
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,96,96,128,1,float16,fp8,0,0.4664320151011149
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,96,1,128,1,fp8,fp8,0,0.40414400895436603
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,96,4,128,1,float16,float16,0,0.40619198481241864
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,96,4,128,1,float16,fp8,0,0.4065279960632324
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,96,4,128,1,fp8,fp8,0,0.4054986635843913
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,96,8,128,1,float16,float16,0,0.40994131565093994
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,96,8,128,1,fp8,fp8,0,0.4116479953130086
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,96,8,128,1,float16,fp8,0,0.4092586835225423
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,96,96,128,1,float16,float16,0,0.2505439917246501
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,96,96,128,1,float16,fp8,0,0.244053324063619
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,96,96,128,1,fp8,fp8,0,0.2553173303604126
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,96,1,128,1,float16,float16,0,0.212991992632548
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,96,1,128,1,float16,fp8,0,0.21128533283869425
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,96,1,128,1,fp8,fp8,0,0.2106026609738668
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,96,4,128,1,float16,float16,0,0.2123039960861206
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,96,4,128,1,float16,fp8,0,0.211626668771108
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,96,4,128,1,fp8,fp8,0,0.2129866679509481
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,96,8,128,1,float16,float16,0,0.21504533290863037
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,96,8,128,1,float16,fp8,0,0.21504000822703043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,96,8,128,1,fp8,fp8,0,0.21741867065429688
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,96,96,128,1,float16,float16,0,0.13482667009035745
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,96,96,128,1,fp8,fp8,0,0.14011200269063315
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,96,1,128,1,float16,float16,0,0.1153706709543864
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,96,96,128,1,float16,fp8,0,0.13345600167910257
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,96,1,128,1,float16,fp8,0,0.1160533328851064
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,96,1,128,1,fp8,fp8,0,0.11537599563598633
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,96,4,128,1,float16,float16,0,0.1153706709543864
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,96,4,128,1,float16,fp8,0,0.11502400040626526
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,96,4,128,1,fp8,fp8,0,0.11741866668065389
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,96,8,128,1,float16,float16,0,0.11776000261306763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,96,8,128,1,float16,fp8,0,0.11741866668065389
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,96,8,128,1,fp8,fp8,0,0.11980266372362773
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,96,96,128,1,float16,float16,0,0.07543466488520305
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,96,96,128,1,float16,fp8,0,0.0747519979874293
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,96,96,128,1,fp8,fp8,0,0.08295999964078267
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,96,1,128,1,float16,float16,0,0.06657599906126659
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,96,1,128,1,float16,fp8,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,96,1,128,1,fp8,fp8,0,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,96,4,128,1,float16,float16,0,0.06622399886449178
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,96,4,128,1,float16,fp8,0,0.06621333460013072
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,96,4,128,1,fp8,fp8,0,0.06383466720581055
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,96,8,128,1,float16,fp8,0,0.06758399804433186
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,96,8,128,1,fp8,fp8,0,0.06553600231806438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,96,96,128,1,float16,float16,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,96,8,128,1,float16,float16,0,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,96,96,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,96,96,128,1,fp8,fp8,0,0.04334400097529093
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,96,1,128,1,float16,float16,0,0.04061333338419596
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,96,1,128,1,fp8,fp8,0,0.04095999896526337
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,96,1,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,96,4,128,1,float16,float16,0,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,96,4,128,1,float16,fp8,0,0.04128533353408178
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,96,4,128,1,fp8,fp8,0,0.04128533353408178
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,96,8,128,1,float16,float16,0,0.04165333261092504
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,96,8,128,1,float16,fp8,0,0.04162133236726125
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,96,8,128,1,fp8,fp8,0,0.041637333730856575
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,96,96,128,1,float16,float16,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,96,96,128,1,float16,fp8,0,0.030378667016824085
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,96,96,128,1,fp8,fp8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,96,1,128,1,float16,float16,0,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,96,1,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,96,1,128,1,fp8,fp8,0,0.028677334388097126
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,96,4,128,1,float16,float16,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,96,4,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,96,4,128,1,fp8,fp8,0,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,96,8,128,1,float16,fp8,0,0.030373332401116688
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,96,8,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,96,96,128,1,float16,float16,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,96,96,128,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,96,8,128,1,float16,float16,0,0.02934933453798294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,96,96,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,96,1,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,96,1,128,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,96,1,128,1,fp8,fp8,0,0.02080533280968666
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,96,4,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,96,4,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,96,4,128,1,fp8,fp8,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,96,8,128,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,96,8,128,1,float16,fp8,0,0.021509334444999695
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,96,8,128,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,96,1,128,1,float16,float16,0,1.028608004252116
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,96,1,128,1,float16,fp8,0,1.0292853514353435
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,96,1,128,1,fp8,fp8,0,1.106767972310384
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,96,4,128,1,float16,float16,0,1.034069299697876
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,96,4,128,1,float16,fp8,0,1.035098632176717
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,96,4,128,1,fp8,fp8,0,1.1163360277811687
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,96,8,128,1,float16,float16,0,1.0357813040415447
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,96,8,128,1,float16,fp8,0,1.0398720105489094
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,96,8,128,1,fp8,fp8,0,1.1228053569793701
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,96,96,128,1,float16,float16,0,0.5946026643117269
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,96,96,128,1,float16,fp8,0,0.5826559861501058
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,96,1,128,1,float16,float16,0,0.5232586860656738
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,96,1,128,1,float16,fp8,0,0.5217333237330118
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,96,96,128,1,fp8,fp8,0,0.6442559957504272
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,96,1,128,1,fp8,fp8,0,0.5625173250834147
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,96,4,128,1,float16,float16,0,0.52292267481486
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,96,4,128,1,float16,fp8,0,0.524288018544515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,96,4,128,1,fp8,fp8,0,0.5618346532185873
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,96,8,128,1,float16,float16,0,0.5273600021998087
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,96,8,128,1,fp8,fp8,0,0.5683199961980184
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,96,8,128,1,float16,fp8,0,0.5246346791585287
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,96,96,128,1,float16,float16,0,0.3080480098724365
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,96,96,128,1,float16,fp8,0,0.3015679915746053
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,96,1,128,1,float16,float16,0,0.2701866626739502
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,96,96,128,1,fp8,fp8,0,0.33058132727940875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,96,1,128,1,float16,fp8,0,0.2718399961789449
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,96,1,128,1,fp8,fp8,0,0.28859732548395794
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,96,4,128,1,float16,float16,0,0.26982933282852173
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,96,4,128,1,float16,fp8,0,0.2701653242111206
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,96,8,128,1,float16,float16,0,0.27050666014353436
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,96,4,128,1,fp8,fp8,0,0.2892799973487854
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,96,8,128,1,float16,fp8,0,0.2739253242810567
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,96,8,128,1,fp8,fp8,0,0.2916693290074666
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,96,96,128,1,float16,float16,0,0.16554666558901468
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,96,96,128,1,float16,fp8,0,0.159061332543691
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,96,96,128,1,fp8,fp8,0,0.17885865767796835
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,96,1,128,1,float16,float16,0,0.14387200276056925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,96,1,128,1,float16,fp8,0,0.14455999930699667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,96,1,128,1,fp8,fp8,0,0.15411200126012167
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,96,4,128,1,float16,float16,0,0.14404799540837607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,96,4,128,1,float16,fp8,0,0.145578662554423
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,96,4,128,1,fp8,fp8,0,0.15427733461062113
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,96,8,128,1,float16,float16,0,0.14643200238545737
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,96,8,128,1,float16,fp8,0,0.14762666821479797
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,96,8,128,1,fp8,fp8,0,0.15803733468055725
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,96,96,128,1,float16,float16,0,0.08942400415738423
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,96,96,128,1,float16,fp8,0,0.08873599767684937
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,96,96,128,1,fp8,fp8,0,0.10069333513577779
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,96,1,128,1,float16,float16,0,0.08054933448632558
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,96,1,128,1,float16,fp8,0,0.0798773318529129
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,96,1,128,1,fp8,fp8,0,0.08328533172607422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,96,4,128,1,float16,float16,0,0.0795253316561381
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,96,4,128,1,float16,fp8,0,0.07918400069077809
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,96,4,128,1,fp8,fp8,0,0.08328533172607422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,96,8,128,1,float16,float16,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,96,8,128,1,float16,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,96,8,128,1,fp8,fp8,0,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,96,96,128,1,float16,float16,0,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,96,96,128,1,float16,fp8,0,0.049829334020614624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,96,96,128,1,fp8,fp8,0,0.054272000988324486
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,96,1,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,96,1,128,1,float16,fp8,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,96,1,128,1,fp8,fp8,0,0.049498667319615684
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,96,4,128,1,float16,float16,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,96,4,128,1,float16,fp8,0,0.047781333327293396
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,96,4,128,1,fp8,fp8,0,0.05017599960168203
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,96,8,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,96,8,128,1,float16,fp8,0,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,96,8,128,1,fp8,fp8,0,0.05156800150871277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,96,96,128,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,96,96,128,1,float16,fp8,0,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,96,96,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,96,1,128,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,96,1,128,1,float16,fp8,0,0.031045332551002502
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,96,1,128,1,fp8,fp8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,96,4,128,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,96,4,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,96,4,128,1,fp8,fp8,0,0.03209066639343897
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,96,8,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,96,8,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,96,8,128,1,fp8,fp8,0,0.03311999887228012
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,96,96,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,96,96,128,1,float16,fp8,0,0.024885334074497223
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,96,1,128,1,float16,float16,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,96,1,128,1,float16,fp8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,96,1,128,1,fp8,fp8,0,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,96,96,128,1,fp8,fp8,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,96,4,128,1,float16,float16,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,96,4,128,1,float16,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,96,4,128,1,fp8,fp8,0,0.023893333971500397
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,96,8,128,1,float16,float16,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,96,8,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,96,96,128,1,float16,float16,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,96,96,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,96,96,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,96,1,128,1,float16,float16,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,96,1,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,96,8,128,1,fp8,fp8,0,0.023178666830062866
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,96,1,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,96,4,128,1,float16,float16,0,0.017077332983414333
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,96,4,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,96,4,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,96,8,128,1,float16,float16,0,0.018800000349680584
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,96,8,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,96,8,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,96,1,128,1,float16,float16,0,0.8002560138702393
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,96,1,128,1,float16,fp8,0,0.8023040294647217
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,96,1,128,1,fp8,fp8,0,0.889514684677124
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,96,4,128,1,float16,float16,0,0.7995733420054117
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,96,4,128,1,float16,fp8,0,0.8012853463490804
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,96,4,128,1,fp8,fp8,0,0.8891680240631104
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,96,8,128,1,float16,float16,0,0.8040106296539307
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,96,8,128,1,float16,fp8,0,0.8081066608428955
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,96,8,128,1,fp8,fp8,0,0.8980480035146078
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,96,96,128,1,float16,float16,0,0.4456106821695964
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,96,96,128,1,float16,fp8,0,0.439461350440979
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,96,1,128,1,float16,float16,0,0.408896009127299
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,96,96,128,1,fp8,fp8,0,0.49644267559051514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,96,1,128,1,float16,fp8,0,0.4078986644744873
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,96,1,128,1,fp8,fp8,0,0.45311999320983887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,96,4,128,1,float16,float16,0,0.40722668170928955
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,96,4,128,1,float16,fp8,0,0.4092586835225423
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,96,4,128,1,fp8,fp8,0,0.4558346668879191
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,96,8,128,1,float16,float16,0,0.4102880160013835
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,96,8,128,1,float16,fp8,0,0.4126666784286499
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,96,8,128,1,fp8,fp8,0,0.4602880080540975
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,96,96,128,1,float16,float16,0,0.2321066657702128
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,96,96,128,1,fp8,fp8,0,0.2604373296101888
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,96,96,128,1,float16,fp8,0,0.2307573358217875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,96,1,128,1,float16,float16,0,0.21504000822703043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,96,1,128,1,float16,fp8,0,0.21401600042978922
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,96,1,128,1,fp8,fp8,0,0.2379200061162313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,96,4,128,1,float16,float16,0,0.21369065841039023
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,96,4,128,1,float16,fp8,0,0.2153493364651998
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,96,4,128,1,fp8,fp8,0,0.23825067281723022
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,96,8,128,1,float16,float16,0,0.21678400039672852
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,96,8,128,1,float16,fp8,0,0.2167466680208842
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,96,8,128,1,fp8,fp8,0,0.24064532915751138
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,96,96,128,1,float16,float16,0,0.12426132957140605
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,96,96,128,1,float16,fp8,0,0.12390399972597758
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,96,96,128,1,fp8,fp8,0,0.1423413356145223
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,96,1,128,1,float16,float16,0,0.11506133278210957
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,96,1,128,1,float16,fp8,0,0.11365866661071777
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,96,1,128,1,fp8,fp8,0,0.12390399972597758
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,96,4,128,1,float16,float16,0,0.11502933502197266
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,96,4,128,1,float16,fp8,0,0.11333866914113362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,96,4,128,1,fp8,fp8,0,0.12321600317955017
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,96,8,128,1,float16,float16,0,0.11538133025169373
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,96,8,128,1,float16,fp8,0,0.11503466963768005
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,96,8,128,1,fp8,fp8,0,0.12526933352152506
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,96,96,128,1,float16,float16,0,0.06656000018119812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,96,96,128,1,float16,fp8,0,0.06724266707897186
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,96,96,128,1,fp8,fp8,0,0.07577600081761678
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,96,1,128,1,float16,float16,0,0.06588266789913177
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,96,1,128,1,float16,fp8,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,96,1,128,1,fp8,fp8,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,96,4,128,1,float16,float16,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,96,4,128,1,float16,fp8,0,0.06519466638565063
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,96,8,128,1,float16,float16,0,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,96,8,128,1,float16,fp8,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,96,8,128,1,fp8,fp8,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,96,4,128,1,fp8,fp8,0,0.07066133121649425
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,96,96,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,96,96,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,96,96,128,1,fp8,fp8,0,0.04370133578777313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,96,1,128,1,float16,float16,0,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,96,1,128,1,float16,fp8,0,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,96,1,128,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,96,4,128,1,float16,float16,0,0.03994133323431015
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,96,4,128,1,float16,fp8,0,0.03993066648642222
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,96,4,128,1,fp8,fp8,0,0.04266666869322459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,96,8,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,96,8,128,1,float16,fp8,0,0.04128533353408178
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,96,8,128,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,96,96,128,1,float16,float16,0,0.027989332874615986
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,96,96,128,1,float16,fp8,0,0.029029332101345062
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,96,96,128,1,fp8,fp8,0,0.030031998952229817
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,96,1,128,1,float16,float16,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,96,1,128,1,float16,fp8,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,96,1,128,1,fp8,fp8,0,0.028325334191322327
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,96,4,128,1,float16,float16,0,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,96,4,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,96,4,128,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,96,8,128,1,float16,float16,0,0.027674667537212372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,96,8,128,1,float16,fp8,0,0.02867199977238973
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,96,8,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,96,96,128,1,float16,float16,0,0.020810666183630627
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,96,96,128,1,float16,fp8,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,96,96,128,1,fp8,fp8,0,0.022858666876951855
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,96,1,128,1,float16,float16,0,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,96,1,128,1,float16,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,96,1,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,96,4,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,96,4,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,96,4,128,1,fp8,fp8,0,0.020831999679406483
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,96,8,128,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,96,8,128,1,float16,fp8,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,96,8,128,1,fp8,fp8,0,0.022885332504908245
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,96,96,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,96,96,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,96,96,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,96,1,128,1,float16,float16,0,0.018090666582187016
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,96,1,128,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,96,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,96,4,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,96,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,96,4,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,96,8,128,1,float16,float16,0,0.017743999759356182
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,96,8,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,96,8,128,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,96,1,128,1,float16,float16,0,0.7000640233357748
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,96,1,128,1,fp8,fp8,0,0.787285327911377
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,96,1,128,1,float16,fp8,0,0.7024640242258707
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,96,4,128,1,float16,float16,0,0.7034880320231119
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,96,4,128,1,float16,fp8,0,0.7055359681447347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,96,4,128,1,fp8,fp8,0,0.7889920075734457
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,96,8,128,1,float16,float16,0,0.7079253196716309
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,96,8,128,1,float16,fp8,0,0.7082666556040446
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,96,8,128,1,fp8,fp8,0,0.7934292952219645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,96,96,128,1,float16,float16,0,0.3800746599833171
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,96,96,128,1,fp8,fp8,0,0.42683732509613037
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,96,96,128,1,float16,fp8,0,0.37460800011952716
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,96,1,128,1,float16,float16,0,0.36130134264628094
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,96,1,128,1,float16,fp8,0,0.3582346836725871
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,96,1,128,1,fp8,fp8,0,0.40379734834035236
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,96,4,128,1,float16,float16,0,0.3595946629842122
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,96,4,128,1,float16,fp8,0,0.3609653313954671
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,96,8,128,1,float16,float16,0,0.360975980758667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,96,4,128,1,fp8,fp8,0,0.40584532419840497
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,96,8,128,1,float16,fp8,0,0.3633493185043335
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,96,8,128,1,fp8,fp8,0,0.4072106679280599
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,96,96,128,1,float16,float16,0,0.19868266582489014
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,96,96,128,1,float16,fp8,0,0.19729600350062051
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,96,1,128,1,float16,float16,0,0.18926932414372763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,96,96,128,1,fp8,fp8,0,0.22494399547576904
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,96,1,128,1,float16,fp8,0,0.18943999210993448
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,96,1,128,1,fp8,fp8,0,0.20684800545374551
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,96,4,128,1,float16,float16,0,0.1872373421986898
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,96,4,128,1,float16,fp8,0,0.18653867642084757
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,96,4,128,1,fp8,fp8,0,0.20667733748753866
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,96,8,128,1,float16,float16,0,0.18774400154749551
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,96,8,128,1,float16,fp8,0,0.18807466824849448
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,96,8,128,1,fp8,fp8,0,0.20718934138615927
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,96,96,128,1,float16,float16,0,0.10478933652242024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,96,96,128,1,float16,fp8,0,0.10412266850471497
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,96,96,128,1,fp8,fp8,0,0.11708266536394756
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,96,1,128,1,float16,float16,0,0.10308266679445903
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,96,1,128,1,float16,fp8,0,0.10240000486373901
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,96,1,128,1,fp8,fp8,0,0.11230400204658508
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,96,4,128,1,float16,float16,0,0.10102933645248413
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,96,4,128,1,float16,fp8,0,0.10103467106819153
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,96,4,128,1,fp8,fp8,0,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,96,8,128,1,float16,float16,0,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,96,8,128,1,float16,fp8,0,0.10308266679445903
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,96,8,128,1,fp8,fp8,0,0.11366400122642517
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,96,96,128,1,float16,float16,0,0.05972800155480703
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,96,96,128,1,float16,fp8,0,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,96,96,128,1,fp8,fp8,0,0.06589333216349284
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,96,1,128,1,float16,float16,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,96,1,128,1,float16,fp8,0,0.05836800237496694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,96,1,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,96,4,128,1,float16,float16,0,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,96,4,128,1,float16,fp8,0,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,96,4,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,96,8,128,1,float16,float16,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,96,8,128,1,float16,fp8,0,0.06005866825580597
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,96,8,128,1,fp8,fp8,0,0.06452799836794536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,96,96,128,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,96,96,128,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,96,96,128,1,fp8,fp8,0,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,96,1,128,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,96,1,128,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,96,1,128,1,fp8,fp8,0,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,96,4,128,1,float16,float16,0,0.03719999889532725
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,96,4,128,1,float16,fp8,0,0.0365280012289683
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,96,4,128,1,fp8,fp8,0,0.039274667700131737
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,96,8,128,1,float16,float16,0,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,96,8,128,1,float16,fp8,0,0.03822933385769526
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,96,8,128,1,fp8,fp8,0,0.03959999978542328
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,96,96,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,96,96,128,1,float16,fp8,0,0.0249439999461174
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,96,96,128,1,fp8,fp8,0,0.02699200063943863
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,96,1,128,1,float16,float16,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,96,1,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,96,1,128,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,96,4,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,96,4,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,96,4,128,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,96,8,128,1,float16,float16,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,96,8,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,96,96,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,96,96,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,96,96,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,96,1,128,1,float16,float16,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,96,8,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,96,1,128,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,96,1,128,1,fp8,fp8,0,0.020479999482631683
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,96,4,128,1,float16,float16,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,96,4,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,96,4,128,1,fp8,fp8,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,96,8,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,96,8,128,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,96,8,128,1,fp8,fp8,0,0.021498667697111767
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,96,96,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,96,96,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,96,96,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,96,1,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,96,1,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,96,1,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,96,4,128,1,float16,float16,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,96,4,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,96,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,96,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,96,8,128,1,float16,fp8,0,0.01775466650724411
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,96,8,128,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,64,1,128,1,float16,float16,0,31.11851755777995
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,64,1,128,1,float16,fp8,0,31.30352020263672
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,64,1,128,1,fp8,fp8,0,23.981908162434895
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,64,2,128,1,float16,float16,0,31.267003377278645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,64,2,128,1,float16,fp8,0,31.719940185546875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,64,2,128,1,fp8,fp8,0,24.369834899902344
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,64,4,128,1,float16,float16,0,31.146502176920574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,64,4,128,1,float16,fp8,0,30.945627848307293
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,64,4,128,1,fp8,fp8,0,24.268282572428387
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,64,8,128,1,float16,float16,0,31.302825927734375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,64,8,128,1,float16,fp8,0,31.769775390625
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,64,1,128,1,float16,float16,0,15.562751770019531
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,64,8,128,1,fp8,fp8,0,24.035504659016926
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,64,1,128,1,fp8,fp8,0,12.075519561767578
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,64,1,128,1,float16,fp8,0,15.593301137288412
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,64,64,128,1,float16,float16,0,15.906475067138672
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,64,2,128,1,float16,float16,0,15.93514633178711
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,64,2,128,1,float16,fp8,0,15.82028834025065
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,64,2,128,1,fp8,fp8,0,12.099754333496094
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,64,64,128,1,fp8,fp8,0,12.65237299601237
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,64,64,128,1,float16,fp8,0,16.20343526204427
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,64,4,128,1,float16,float16,0,15.771476745605469
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,64,4,128,1,fp8,fp8,0,12.083029429117838
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,64,4,128,1,float16,fp8,0,15.775216420491537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,64,8,128,1,float16,float16,0,15.71993637084961
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,64,8,128,1,float16,fp8,0,15.564287821451822
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,64,8,128,1,fp8,fp8,0,12.132155100504557
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,64,1,128,1,float16,float16,0,7.7445119222005205
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,64,1,128,1,float16,fp8,0,8.03653335571289
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,64,1,128,1,fp8,fp8,0,6.2798506418863935
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,64,64,128,1,float16,fp8,0,7.642447789510091
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,64,64,128,1,float16,float16,0,8.045733133951822
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,64,2,128,1,float16,float16,0,7.703210830688477
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,64,64,128,1,fp8,fp8,0,6.491647720336914
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,64,2,128,1,fp8,fp8,0,6.269269307454427
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,64,2,128,1,float16,fp8,0,8.018085479736328
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,64,4,128,1,float16,float16,0,7.561920166015625
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,64,4,128,1,float16,fp8,0,7.829173405965169
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,64,4,128,1,fp8,fp8,0,6.286853154500325
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,64,8,128,1,float16,float16,0,7.904773076375325
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,64,1,128,1,float16,float16,0,3.975152015686035
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,64,1,128,1,float16,fp8,0,3.967162768046061
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,64,8,128,1,float16,fp8,0,7.937034606933594
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,64,1,128,1,fp8,fp8,0,3.3875786463419595
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,64,8,128,1,fp8,fp8,0,6.296911875406901
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,64,64,128,1,float16,float16,0,4.043456077575684
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,64,64,128,1,float16,fp8,0,4.142266591389974
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,64,2,128,1,float16,float16,0,4.039344151814778
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,64,2,128,1,float16,fp8,0,3.865088144938151
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,64,2,128,1,fp8,fp8,0,3.381765365600586
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,64,64,128,1,fp8,fp8,0,3.4897918701171875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,64,4,128,1,float16,float16,0,3.9360745747884116
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,64,4,128,1,float16,fp8,0,3.922778765360514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,64,4,128,1,fp8,fp8,0,3.3875627517700195
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,64,8,128,1,float16,float16,0,3.948725382486979
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,64,8,128,1,float16,fp8,0,4.055738766988118
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,64,8,128,1,fp8,fp8,0,3.3988161087036133
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,64,1,128,1,float16,float16,0,17.93177032470703
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,64,1,128,1,fp8,fp8,0,14.238890329996744
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,64,1,128,1,float16,fp8,0,17.85974884033203
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,64,2,128,1,float16,float16,0,18.17668279012044
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,64,2,128,1,float16,fp8,0,18.308773040771484
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,64,2,128,1,fp8,fp8,0,14.261418660481771
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,64,4,128,1,float16,float16,0,17.965738932291668
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,64,4,128,1,float16,fp8,0,18.19101842244466
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,64,1,128,1,float16,float16,0,9.02450688680013
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,64,4,128,1,fp8,fp8,0,14.233088175455729
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,64,8,128,1,float16,float16,0,18.49191411336263
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,64,8,128,1,fp8,fp8,0,14.336507161458334
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,64,8,128,1,float16,fp8,0,18.54038365681966
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,64,1,128,1,float16,fp8,0,9.032032012939453
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,64,1,128,1,fp8,fp8,0,7.25111452738444
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,64,64,128,1,float16,fp8,0,9.730560302734375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,64,64,128,1,float16,float16,0,9.287514368693033
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,64,64,128,1,fp8,fp8,0,7.681701024373372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,64,2,128,1,float16,fp8,0,8.960853576660156
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,64,2,128,1,fp8,fp8,0,7.26852289835612
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,64,2,128,1,float16,float16,0,9.038517634073893
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,64,4,128,1,float16,float16,0,9.200133641560873
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,64,4,128,1,float16,fp8,0,8.954197565714518
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,64,4,128,1,fp8,fp8,0,7.271584192911784
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,64,8,128,1,float16,float16,0,8.856074651082357
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,64,8,128,1,float16,fp8,0,9.337514877319336
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,64,1,128,1,float16,float16,0,4.555962562561035
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,64,8,128,1,fp8,fp8,0,7.299584070841472
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,64,1,128,1,float16,fp8,0,4.498613357543945
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,64,1,128,1,fp8,fp8,0,3.81388250986735
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,64,2,128,1,float16,float16,0,4.577808062235515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,64,64,128,1,float16,float16,0,4.772879918416341
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,64,64,128,1,float16,fp8,0,4.780725479125977
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,64,64,128,1,fp8,fp8,0,3.9818239212036133
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,64,2,128,1,float16,fp8,0,4.398922602335612
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,64,2,128,1,fp8,fp8,0,3.8227787017822266
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,64,4,128,1,float16,float16,0,4.526938756306966
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,64,4,128,1,float16,fp8,0,4.492469469706218
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,64,4,128,1,fp8,fp8,0,3.8292531967163086
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,64,8,128,1,float16,float16,0,4.595360120137532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,64,8,128,1,fp8,fp8,0,3.8336801528930664
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,64,8,128,1,float16,fp8,0,4.639589309692383
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,64,1,128,1,float16,float16,0,2.3545173009236655
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,64,1,128,1,float16,fp8,0,2.386085351308187
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,64,1,128,1,fp8,fp8,0,2.0981760025024414
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,64,2,128,1,float16,float16,0,2.3555466334025064
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,64,2,128,1,float16,fp8,0,2.369365374247233
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,64,64,128,1,float16,float16,0,2.428069273630778
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,64,64,128,1,float16,fp8,0,2.4659627278645835
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,64,64,128,1,fp8,fp8,0,2.1705387433369956
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,64,2,128,1,fp8,fp8,0,2.0937387148539224
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,64,4,128,1,float16,float16,0,2.3881386121114097
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,64,4,128,1,float16,fp8,0,2.3569067319234214
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,64,4,128,1,fp8,fp8,0,2.101583957672119
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,64,8,128,1,float16,fp8,0,2.3727466265360513
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,64,8,128,1,fp8,fp8,0,2.106368064880371
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,64,8,128,1,float16,float16,0,2.370031992594401
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,64,1,128,1,float16,float16,0,13.144234975179037
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,64,1,128,1,float16,fp8,0,12.984666188557943
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,64,1,128,1,fp8,fp8,0,10.328234354654947
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,64,2,128,1,float16,float16,0,13.361829121907553
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,64,2,128,1,float16,fp8,0,13.384709676106771
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,64,2,128,1,fp8,fp8,0,10.327893575032553
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,64,4,128,1,float16,float16,0,13.212533315022787
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,64,4,128,1,float16,fp8,0,13.26626714070638
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,64,1,128,1,float16,float16,0,6.415706634521484
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,64,4,128,1,fp8,fp8,0,10.33949343363444
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,64,8,128,1,float16,float16,0,13.188772837320963
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,64,8,128,1,float16,fp8,0,12.969130198160807
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,64,8,128,1,fp8,fp8,0,10.398042678833008
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,64,1,128,1,float16,fp8,0,6.532442728678386
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,64,64,128,1,float16,float16,0,6.7176157633463545
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,64,64,128,1,float16,fp8,0,6.735701243082683
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,64,1,128,1,fp8,fp8,0,5.3034772872924805
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,64,64,128,1,fp8,fp8,0,5.600255966186523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,64,2,128,1,float16,float16,0,6.3928267161051435
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,64,2,128,1,float16,fp8,0,6.569813410441081
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,64,2,128,1,fp8,fp8,0,5.31711991628011
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,64,4,128,1,float16,float16,0,6.614186604817708
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,64,4,128,1,float16,fp8,0,6.15170160929362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,64,4,128,1,fp8,fp8,0,5.314048131306966
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,64,8,128,1,float16,float16,0,6.2226613362630205
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,64,8,128,1,float16,fp8,0,6.5206451416015625
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,64,1,128,1,float16,float16,0,3.17252254486084
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,64,8,128,1,fp8,fp8,0,5.327018737792969
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,64,1,128,1,float16,fp8,0,3.2380641301472983
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,64,1,128,1,fp8,fp8,0,2.808490753173828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,64,64,128,1,float16,float16,0,3.351381301879883
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,64,64,128,1,float16,fp8,0,3.327317237854004
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,64,2,128,1,float16,float16,0,3.2172587712605796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,64,64,128,1,fp8,fp8,0,2.947413444519043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,64,2,128,1,float16,fp8,0,3.1848106384277344
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,64,2,128,1,fp8,fp8,0,2.809856096903483
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,64,4,128,1,float16,float16,0,3.2049547831217446
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,64,4,128,1,fp8,fp8,0,2.822138786315918
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,64,4,128,1,float16,fp8,0,3.2274932861328125
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,64,8,128,1,float16,float16,0,3.2240800857543945
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,64,8,128,1,float16,fp8,0,3.2333014806111655
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,64,1,128,1,float16,float16,0,1.7577120463053386
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,64,1,128,1,float16,fp8,0,1.739946683247884
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,64,8,128,1,fp8,fp8,0,2.82590389251709
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,64,1,128,1,fp8,fp8,0,1.5650134086608887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,64,64,128,1,float16,float16,0,1.7877333958943684
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,64,2,128,1,float16,float16,0,1.7522346178690593
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,64,64,128,1,float16,fp8,0,1.8123253186543782
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,64,64,128,1,fp8,fp8,0,1.6238880157470703
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,64,2,128,1,fp8,fp8,0,1.561946709950765
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,64,2,128,1,float16,fp8,0,1.7628159523010254
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,64,4,128,1,float16,float16,0,1.7443893750508626
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,64,4,128,1,float16,fp8,0,1.739258607228597
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,64,4,128,1,fp8,fp8,0,1.569109280904134
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,64,8,128,1,float16,float16,0,1.7580374081929524
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,64,8,128,1,float16,fp8,0,1.750186602274577
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,64,8,128,1,fp8,fp8,0,1.567402680714925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,64,1,128,1,float16,fp8,0,17.907541910807293
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,64,1,128,1,float16,float16,0,17.44418716430664
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,64,1,128,1,fp8,fp8,0,13.960880279541016
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,64,2,128,1,float16,float16,0,17.7729008992513
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,64,2,128,1,float16,fp8,0,17.430885314941406
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,64,2,128,1,fp8,fp8,0,13.9772580464681
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,64,4,128,1,float16,fp8,0,17.76708221435547
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,64,4,128,1,float16,float16,0,17.323871612548828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,64,1,128,1,float16,float16,0,8.684549331665039
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,64,4,128,1,fp8,fp8,0,14.012415568033854
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,64,1,128,1,float16,fp8,0,8.632319768269857
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,64,8,128,1,float16,float16,0,17.389563242594402
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,64,8,128,1,fp8,fp8,0,14.037818908691406
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,64,8,128,1,float16,fp8,0,17.649664560953777
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,64,64,128,1,float16,float16,0,8.804037094116211
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,64,64,128,1,float16,fp8,0,9.441450754801432
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,64,64,128,1,fp8,fp8,0,7.520938873291016
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,64,1,128,1,fp8,fp8,0,7.068330764770508
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,64,2,128,1,float16,float16,0,8.72432009379069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,64,2,128,1,float16,fp8,0,8.648533503214518
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,64,2,128,1,fp8,fp8,0,7.071055730183919
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,64,4,128,1,float16,float16,0,8.802138646443685
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,64,4,128,1,float16,fp8,0,8.683706919352213
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,64,4,128,1,fp8,fp8,0,7.088645299275716
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,64,8,128,1,float16,float16,0,8.666117350260416
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,64,8,128,1,float16,fp8,0,8.929621378580729
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,64,1,128,1,float16,float16,0,4.149584134419759
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,64,8,128,1,fp8,fp8,0,7.0999094645182295
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,64,1,128,1,float16,fp8,0,4.272986729939778
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,64,64,128,1,float16,float16,0,4.459018707275391
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,64,1,128,1,fp8,fp8,0,3.6474879582722983
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,64,2,128,1,float16,float16,0,4.406965255737305
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,64,64,128,1,float16,fp8,0,4.382554690043132
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,64,64,128,1,fp8,fp8,0,3.8876161575317383
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,64,2,128,1,float16,fp8,0,4.297904014587402
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,64,2,128,1,fp8,fp8,0,3.651583989461263
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,64,4,128,1,float16,float16,0,4.19535477956136
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,64,4,128,1,float16,fp8,0,4.307813326517741
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,64,4,128,1,fp8,fp8,0,3.6587467193603516
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,64,8,128,1,float16,float16,0,4.349621454874675
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,64,8,128,1,float16,fp8,0,4.3692372639973955
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,64,1,128,1,float16,float16,0,2.179749329884847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,64,8,128,1,fp8,fp8,0,3.67906125386556
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,64,1,128,1,float16,fp8,0,2.1917014122009277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,64,1,128,1,fp8,fp8,0,1.9570293426513672
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,64,64,128,1,float16,float16,0,2.2768640518188477
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,64,2,128,1,float16,float16,0,2.1800692876180015
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,64,64,128,1,float16,fp8,0,2.306389331817627
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,64,64,128,1,fp8,fp8,0,2.0616532961527505
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,64,2,128,1,float16,fp8,0,2.187946637471517
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,64,2,128,1,fp8,fp8,0,1.9553279876708984
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,64,4,128,1,float16,float16,0,2.192042668660482
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,64,4,128,1,float16,fp8,0,2.219007968902588
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,64,4,128,1,fp8,fp8,0,1.9594240188598633
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,64,8,128,1,float16,float16,0,2.186229387919108
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,64,8,128,1,float16,fp8,0,2.1988693873087564
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,64,8,128,1,fp8,fp8,0,1.9640320142110188
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,64,1,128,1,float16,float16,0,1.2233386834462483
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,64,64,128,1,float16,float16,0,1.2542239824930828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,64,1,128,1,float16,fp8,0,1.2124213377634685
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,64,1,128,1,fp8,fp8,0,1.106112003326416
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,64,2,128,1,float16,float16,0,1.2127892971038818
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,64,64,128,1,fp8,fp8,0,1.155237356821696
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,64,2,128,1,float16,fp8,0,1.2243680159250896
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,64,64,128,1,float16,fp8,0,1.2726613680521648
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,64,4,128,1,float16,float16,0,1.2134453455607097
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,64,4,128,1,fp8,fp8,0,1.1077813307444255
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,64,4,128,1,float16,fp8,0,1.2158293724060059
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,64,2,128,1,fp8,fp8,0,1.1071146329243977
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,64,8,128,1,float16,float16,0,1.2158293724060059
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,64,8,128,1,float16,fp8,0,1.224021355311076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,64,8,128,1,fp8,fp8,0,1.1091466744740803
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,64,1,128,1,float16,float16,0,10.427759806315104
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,64,1,128,1,fp8,fp8,0,8.67037836710612
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,64,1,128,1,float16,fp8,0,10.312874476114908
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,64,2,128,1,float16,float16,0,10.549589157104492
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,64,2,128,1,fp8,fp8,0,8.677205403645834
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,64,2,128,1,float16,fp8,0,10.718048095703125
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,64,4,128,1,float16,float16,0,10.439343770345053
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,64,4,128,1,float16,fp8,0,10.462544123331705
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,64,1,128,1,float16,float16,0,5.225306510925293
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,64,4,128,1,fp8,fp8,0,8.714431762695312
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,64,1,128,1,float16,fp8,0,5.033130645751953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,64,8,128,1,float16,float16,0,10.444117228190104
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,64,8,128,1,float16,fp8,0,10.772324879964193
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,64,8,128,1,fp8,fp8,0,8.748218536376953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,64,64,128,1,float16,float16,0,5.462720235188802
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,64,64,128,1,fp8,fp8,0,4.767392158508301
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,64,64,128,1,float16,fp8,0,5.512522379557292
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,64,1,128,1,fp8,fp8,0,4.421626726786296
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,64,2,128,1,float16,float16,0,5.180933316548665
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,64,2,128,1,float16,fp8,0,5.070677439371745
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,64,2,128,1,fp8,fp8,0,4.41753609975179
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,64,4,128,1,float16,float16,0,5.211642583211263
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,64,4,128,1,float16,fp8,0,5.102085431416829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,64,4,128,1,fp8,fp8,0,4.429493268330892
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,64,8,128,1,float16,float16,0,5.303807894388835
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,64,8,128,1,float16,fp8,0,5.310970624287923
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,64,1,128,1,float16,float16,0,2.5601706504821777
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,64,8,128,1,fp8,fp8,0,4.4603573481241865
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,64,1,128,1,float16,fp8,0,2.567333380381266
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,64,1,128,1,fp8,fp8,0,2.310826619466146
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,64,64,128,1,float16,float16,0,2.7098401387532554
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,64,64,128,1,float16,fp8,0,2.7455199559529624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,64,64,128,1,fp8,fp8,0,2.478922684987386
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,64,2,128,1,float16,float16,0,2.572810649871826
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,64,2,128,1,float16,fp8,0,2.5704053243001304
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,64,2,128,1,fp8,fp8,0,2.3091252644856772
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,64,4,128,1,float16,float16,0,2.5775465965270996
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,64,4,128,1,float16,fp8,0,2.610688050587972
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,64,4,128,1,fp8,fp8,0,2.312533378601074
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,64,8,128,1,float16,float16,0,2.5782666206359863
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,64,8,128,1,float16,fp8,0,2.58133872350057
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,64,1,128,1,float16,float16,0,1.3864960670471191
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,64,1,128,1,float16,fp8,0,1.36737060546875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,64,8,128,1,fp8,fp8,0,2.3268799781799316
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,64,1,128,1,fp8,fp8,0,1.249626636505127
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,64,64,128,1,float16,fp8,0,1.4711519877115886
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,64,64,128,1,float16,float16,0,1.443669319152832
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,64,2,128,1,float16,float16,0,1.3687465985616047
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,64,2,128,1,fp8,fp8,0,1.2499626477559407
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,64,64,128,1,fp8,fp8,0,1.328816016515096
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,64,4,128,1,float16,float16,0,1.377285321553548
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,64,2,128,1,float16,fp8,0,1.392298698425293
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,64,4,128,1,float16,fp8,0,1.376922607421875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,64,4,128,1,fp8,fp8,0,1.250991980234782
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,64,8,128,1,float16,float16,0,1.377621332804362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,64,8,128,1,float16,fp8,0,1.3779412905375164
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,64,1,128,1,float16,float16,0,0.7782346407572428
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,64,8,128,1,fp8,fp8,0,1.2583200136820476
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,64,1,128,1,float16,fp8,0,0.7835360368092855
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,64,64,128,1,float16,float16,0,0.8104906876881918
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,64,1,128,1,fp8,fp8,0,0.7212373415629069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,64,64,128,1,float16,fp8,0,0.8244907061258951
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,64,2,128,1,float16,float16,0,0.7782346407572428
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,64,2,128,1,fp8,fp8,0,0.7229440212249756
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,64,2,128,1,float16,fp8,0,0.7813066641489664
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,64,64,128,1,fp8,fp8,0,0.7618666489919027
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,64,4,128,1,float16,fp8,0,0.7842079798380533
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,64,4,128,1,float16,float16,0,0.7848959763844808
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,64,4,128,1,fp8,fp8,0,0.7236266930898031
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,64,8,128,1,float16,float16,0,0.7842133045196533
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,64,8,128,1,float16,fp8,0,0.7852319876352946
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,64,8,128,1,fp8,fp8,0,0.726698637008667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,64,1,128,1,float16,float16,0,10.705066680908203
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,64,1,128,1,fp8,fp8,0,9.28769048055013
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,64,1,128,1,float16,fp8,0,10.867552439371744
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,64,2,128,1,float16,fp8,0,10.871653238932291
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,64,2,128,1,float16,float16,0,10.663263956705729
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,64,2,128,1,fp8,fp8,0,9.335295995076498
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,64,4,128,1,float16,float16,0,10.775039672851562
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,64,4,128,1,float16,fp8,0,10.6734987894694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,64,1,128,1,float16,float16,0,5.221546808878581
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,64,4,128,1,fp8,fp8,0,9.308501561482748
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,64,1,128,1,float16,fp8,0,5.190661430358887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,64,8,128,1,float16,float16,0,11.021653493245443
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,64,8,128,1,fp8,fp8,0,9.363957087198893
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,64,8,128,1,float16,fp8,0,11.165696461995443
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,64,64,128,1,float16,fp8,0,5.734912236531575
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,64,64,128,1,float16,float16,0,5.770074844360352
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,64,64,128,1,fp8,fp8,0,5.117781321207683
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,64,1,128,1,fp8,fp8,0,4.654938697814941
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,64,2,128,1,float16,float16,0,5.275487899780273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,64,2,128,1,float16,fp8,0,5.192186673482259
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,64,2,128,1,fp8,fp8,0,4.661397298177083
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,64,4,128,1,float16,float16,0,5.178544044494629
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,64,4,128,1,float16,fp8,0,5.28435738881429
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,64,4,128,1,fp8,fp8,0,4.694197336832683
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,64,8,128,1,float16,float16,0,5.2816267013549805
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,64,8,128,1,float16,fp8,0,5.251920064290364
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,64,8,128,1,fp8,fp8,0,4.711599985758464
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,64,1,128,1,float16,float16,0,2.5946666399637857
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,64,1,128,1,float16,fp8,0,2.612053394317627
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,64,64,128,1,float16,float16,0,2.812586784362793
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,64,1,128,1,fp8,fp8,0,2.399402618408203
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,64,2,128,1,float16,float16,0,2.5905493100484214
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,64,64,128,1,float16,fp8,0,2.877786636352539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,64,2,128,1,float16,fp8,0,2.5830400784810386
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,64,2,128,1,fp8,fp8,0,2.408970673878988
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,64,64,128,1,fp8,fp8,0,2.6477227210998535
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,64,4,128,1,float16,float16,0,2.624517281850179
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,64,4,128,1,float16,fp8,0,2.6079840660095215
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,64,4,128,1,fp8,fp8,0,2.4082825978597007
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,64,8,128,1,float16,float16,0,2.609658718109131
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,64,8,128,1,float16,fp8,0,2.620757261912028
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,64,1,128,1,float16,float16,0,1.3591945966084797
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,64,8,128,1,fp8,fp8,0,2.4208693504333496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,64,1,128,1,float16,fp8,0,1.3557813962300618
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,64,64,128,1,float16,float16,0,1.4617600440979004
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,64,1,128,1,fp8,fp8,0,1.262074629465739
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,64,2,128,1,float16,float16,0,1.3598720232645671
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,64,2,128,1,float16,fp8,0,1.3619306882222493
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,64,64,128,1,float16,fp8,0,1.499824047088623
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,64,2,128,1,fp8,fp8,0,1.2651466528574626
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,64,64,128,1,fp8,fp8,0,1.3820533752441406
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,64,4,128,1,float16,float16,0,1.3595306078592937
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,64,4,128,1,float16,fp8,0,1.3683946927388508
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,64,4,128,1,fp8,fp8,0,1.2695893446604412
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,64,8,128,1,float16,float16,0,1.3711360295613606
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,64,8,128,1,float16,fp8,0,1.376255989074707
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,64,1,128,1,float16,float16,0,0.7447840372721354
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,64,8,128,1,fp8,fp8,0,1.2849280039469402
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,64,1,128,1,float16,fp8,0,0.7447946866353353
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,64,64,128,1,float16,float16,0,0.7879733244578043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,64,64,128,1,fp8,fp8,0,0.7533226807912191
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,64,1,128,1,fp8,fp8,0,0.7000746726989746
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,64,64,128,1,float16,fp8,0,0.8060586452484131
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,64,2,128,1,float16,float16,0,0.7468159993489584
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,64,2,128,1,float16,fp8,0,0.7488799889882406
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,64,4,128,1,float16,fp8,0,0.7485439777374268
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,64,4,128,1,float16,float16,0,0.7465066909790039
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,64,2,128,1,fp8,fp8,0,0.702122688293457
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,64,4,128,1,fp8,fp8,0,0.7028000354766846
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,64,8,128,1,float16,float16,0,0.7495733102162679
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,64,8,128,1,float16,fp8,0,0.7567306359608968
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,64,8,128,1,fp8,fp8,0,0.7072532971700033
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,64,1,128,1,float16,fp8,0,0.4374186595280965
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,64,1,128,1,fp8,fp8,0,0.41557331879933673
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,64,1,128,1,float16,float16,0,0.44151465098063153
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,64,2,128,1,float16,float16,0,0.4387893279393514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,64,64,128,1,float16,float16,0,0.4596000115076701
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,64,64,128,1,float16,fp8,0,0.46745598316192627
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,64,64,128,1,fp8,fp8,0,0.44253333409627277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,64,2,128,1,float16,fp8,0,0.44049068291982013
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,64,2,128,1,fp8,fp8,0,0.41678400834401447
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,64,4,128,1,float16,float16,0,0.44049068291982013
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,64,4,128,1,fp8,fp8,0,0.4170986811319987
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,64,8,128,1,float16,float16,0,0.44356266657511395
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,64,8,128,1,fp8,fp8,0,0.4193280140558879
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,64,8,128,1,float16,fp8,0,0.4452693462371826
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,64,4,128,1,float16,fp8,0,0.4456160068511963
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,64,1,128,1,float16,float16,0,6.70907719930013
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,64,1,128,1,float16,fp8,0,6.677674611409505
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,64,1,128,1,fp8,fp8,0,6.179157257080078
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,64,2,128,1,float16,fp8,0,6.8049971262613935
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,64,2,128,1,float16,float16,0,6.808240254720052
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,64,2,128,1,fp8,fp8,0,6.2016855875651045
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,64,4,128,1,float16,float16,0,6.779738744099935
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,64,4,128,1,float16,fp8,0,6.793541590372722
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,64,1,128,1,float16,float16,0,3.3327840169270835
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,64,4,128,1,fp8,fp8,0,6.238208134969075
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,64,8,128,1,float16,float16,0,6.919850667317708
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,64,8,128,1,float16,fp8,0,6.679728190104167
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,64,8,128,1,fp8,fp8,0,6.288042704264323
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,64,1,128,1,float16,fp8,0,3.338581403096517
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,64,64,128,1,float16,float16,0,3.620527903238932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,64,64,128,1,float16,fp8,0,3.6259787877400718
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,64,64,128,1,fp8,fp8,0,3.4556585947672525
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,64,1,128,1,fp8,fp8,0,3.126789410909017
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,64,2,128,1,float16,float16,0,3.2880640029907227
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,64,2,128,1,float16,fp8,0,3.3402878443400064
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,64,2,128,1,fp8,fp8,0,3.142826716105143
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,64,4,128,1,float16,float16,0,3.3624852498372397
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,64,4,128,1,float16,fp8,0,3.319808006286621
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,64,4,128,1,fp8,fp8,0,3.13429323832194
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,64,8,128,1,float16,float16,0,3.309242566426595
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,64,8,128,1,float16,fp8,0,3.3269761403401694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,64,1,128,1,float16,float16,0,1.7034239768981934
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,64,1,128,1,float16,fp8,0,1.6960852940877278
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,64,8,128,1,fp8,fp8,0,3.1571572621663413
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,64,1,128,1,fp8,fp8,0,1.6167252858479817
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,64,2,128,1,float16,float16,0,1.7024319966634114
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,64,64,128,1,float16,fp8,0,1.8718719482421875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,64,64,128,1,float16,float16,0,1.8421707153320312
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,64,2,128,1,float16,fp8,0,1.7095680236816406
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,64,2,128,1,fp8,fp8,0,1.6170825958251953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,64,64,128,1,fp8,fp8,0,1.8020745913187664
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,64,4,128,1,float16,float16,0,1.7068427403767903
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,64,4,128,1,float16,fp8,0,1.7054719924926758
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,64,4,128,1,fp8,fp8,0,1.6313974062601726
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,64,8,128,1,float16,float16,0,1.7191306749979656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,64,1,128,1,float16,float16,0,0.9038506348927816
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,64,8,128,1,float16,fp8,0,1.712981383005778
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,64,1,128,1,float16,fp8,0,0.9072693188985189
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,64,8,128,1,fp8,fp8,0,1.6368692715962727
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,64,1,128,1,fp8,fp8,0,0.861519972483317
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,64,2,128,1,float16,float16,0,0.9024853706359863
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,64,2,128,1,float16,fp8,0,0.9055786927541097
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,64,64,128,1,float16,float16,0,0.9654560089111328
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,64,64,128,1,float16,fp8,0,0.9958453178405762
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,64,64,128,1,fp8,fp8,0,0.9412266413370768
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,64,2,128,1,fp8,fp8,0,0.8605066935221354
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,64,4,128,1,float16,float16,0,0.9079519907633463
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,64,4,128,1,float16,fp8,0,0.9099786281585693
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,64,4,128,1,fp8,fp8,0,0.8632319768269857
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,64,8,128,1,float16,float16,0,0.9089813232421875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,64,8,128,1,float16,fp8,0,0.9110079606374105
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,64,1,128,1,float16,fp8,0,0.5050186713536581
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,64,1,128,1,float16,float16,0,0.5053439935048422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,64,8,128,1,fp8,fp8,0,0.876202662785848
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,64,64,128,1,float16,float16,0,0.5345280170440674
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,64,64,128,1,float16,fp8,0,0.5420373280843099
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,64,64,128,1,fp8,fp8,0,0.5210453271865845
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,64,1,128,1,fp8,fp8,0,0.47870934009552
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,64,2,128,1,float16,float16,0,0.503658652305603
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,64,2,128,1,float16,fp8,0,0.5060266653696696
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,64,2,128,1,fp8,fp8,0,0.48283199469248456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,64,4,128,1,float16,fp8,0,0.5084160168965658
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,64,4,128,1,fp8,fp8,0,0.4838399887084961
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,64,4,128,1,float16,float16,0,0.5094399849573771
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,64,8,128,1,float16,float16,0,0.5098133484522501
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,64,8,128,1,fp8,fp8,0,0.4852000077565511
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,64,8,128,1,float16,fp8,0,0.5077333450317383
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,64,1,128,1,float16,float16,0,0.3002026677131653
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,64,1,128,1,float16,fp8,0,0.30293333530426025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,64,64,128,1,float16,float16,0,0.3199999928474426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,64,1,128,1,fp8,fp8,0,0.29098665714263916
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,64,64,128,1,float16,fp8,0,0.3264906605084737
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,64,2,128,1,float16,float16,0,0.3015679915746053
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,64,2,128,1,float16,fp8,0,0.3012266755104065
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,64,64,128,1,fp8,fp8,0,0.31214932600657147
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,64,2,128,1,fp8,fp8,0,0.29201066493988037
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,64,4,128,1,float16,fp8,0,0.3036320010821025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,64,8,128,1,float16,float16,0,0.30190932750701904
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,64,4,128,1,fp8,fp8,0,0.292687992254893
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,64,8,128,1,float16,fp8,0,0.30635199944178265
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,64,4,128,1,float16,float16,0,0.30293865998586017
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,64,8,128,1,fp8,fp8,0,0.2930346727371216
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,64,1,128,1,float16,fp8,0,7.508309046427409
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,64,1,128,1,float16,float16,0,7.5499521891276045
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,64,1,128,1,fp8,fp8,0,7.3695627848307295
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,64,2,128,1,float16,fp8,0,7.544138590494792
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,64,2,128,1,fp8,fp8,0,7.403008143107097
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,64,2,128,1,float16,float16,0,7.545520146687825
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,64,4,128,1,float16,fp8,0,7.504213333129883
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,64,4,128,1,float16,float16,0,7.602346420288086
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,64,1,128,1,float16,float16,0,3.7881174087524414
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,64,4,128,1,fp8,fp8,0,7.438159942626953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,64,8,128,1,float16,float16,0,7.5927785237630205
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,64,8,128,1,float16,fp8,0,7.561728159586589
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,64,1,128,1,float16,fp8,0,3.7836745580037436
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,64,8,128,1,fp8,fp8,0,7.509701410929362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,64,64,128,1,float16,fp8,0,4.150608062744141
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,64,64,128,1,float16,float16,0,4.087471961975098
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,64,64,128,1,fp8,fp8,0,4.108970642089844
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,64,1,128,1,fp8,fp8,0,3.6869119008382163
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,64,2,128,1,float16,float16,0,3.747354825337728
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,64,2,128,1,float16,fp8,0,3.7442614237467446
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,64,4,128,1,float16,float16,0,3.7780373891194663
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,64,2,128,1,fp8,fp8,0,3.7275307973225913
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,64,4,128,1,float16,fp8,0,3.777189254760742
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,64,4,128,1,fp8,fp8,0,3.704319953918457
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,64,8,128,1,float16,fp8,0,3.796645482381185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,64,8,128,1,float16,float16,0,3.770538647969564
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,64,1,128,1,float16,float16,0,1.9177759488423665
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,64,1,128,1,float16,fp8,0,1.907551924387614
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,64,8,128,1,fp8,fp8,0,3.7323039372762046
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,64,1,128,1,fp8,fp8,0,1.8790292739868164
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,64,2,128,1,float16,float16,0,1.9071946144104004
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,64,64,128,1,float16,float16,0,2.0825014114379883
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,64,64,128,1,float16,fp8,0,2.112506707509359
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,64,64,128,1,fp8,fp8,0,2.1135360399881997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,64,2,128,1,float16,fp8,0,1.9184640248616536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,64,2,128,1,fp8,fp8,0,1.8769973119099934
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,64,4,128,1,float16,float16,0,1.9228960673014324
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,64,4,128,1,float16,fp8,0,1.9229013125101726
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,64,4,128,1,fp8,fp8,0,1.886522610982259
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,64,8,128,1,float16,float16,0,1.9266613324483235
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,64,8,128,1,float16,fp8,0,1.9345119794209797
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,64,1,128,1,float16,float16,0,0.9965333143870035
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,64,8,128,1,fp8,fp8,0,1.9208426475524902
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,64,1,128,1,float16,fp8,0,0.9920852979024252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,64,1,128,1,fp8,fp8,0,0.9736693700154623
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,64,64,128,1,float16,fp8,0,1.094490687052409
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,64,64,128,1,float16,float16,0,1.0818560123443604
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,64,64,128,1,fp8,fp8,0,1.0859466393788655
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,64,2,128,1,float16,float16,0,0.99617600440979
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,64,2,128,1,float16,fp8,0,0.9975519975026449
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,64,2,128,1,fp8,fp8,0,0.986624002456665
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,64,4,128,1,float16,float16,0,0.9992533524831136
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,64,4,128,1,float16,fp8,0,1.0016427040100098
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,64,4,128,1,fp8,fp8,0,0.9787733554840088
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,64,8,128,1,float16,float16,0,1.008128007253011
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,64,8,128,1,float16,fp8,0,1.0053973197937012
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,64,8,128,1,fp8,fp8,0,0.9890133539835612
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,64,1,128,1,float16,float16,0,0.5311199824015299
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,64,1,128,1,float16,fp8,0,0.530778686205546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,64,64,128,1,float16,float16,0,0.572762648264567
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,64,1,128,1,fp8,fp8,0,0.5239466826121012
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,64,2,128,1,float16,float16,0,0.5314506689707438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,64,64,128,1,float16,fp8,0,0.5901653369267782
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,64,2,128,1,float16,fp8,0,0.535210649172465
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,64,64,128,1,fp8,fp8,0,0.5761760075887045
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,64,4,128,1,float16,float16,0,0.5338506698608398
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,64,2,128,1,fp8,fp8,0,0.5234293142954508
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,64,4,128,1,float16,fp8,0,0.5382826725641886
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,64,4,128,1,fp8,fp8,0,0.5263306697209676
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,64,8,128,1,float16,float16,0,0.5355466604232788
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,64,1,128,1,float16,float16,0,0.29814932743708294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,64,8,128,1,fp8,fp8,0,0.5314559936523438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,64,8,128,1,float16,fp8,0,0.5437440077463785
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,64,1,128,1,float16,fp8,0,0.297818660736084
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,64,64,128,1,float16,float16,0,0.32238932450612384
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,64,1,128,1,fp8,fp8,0,0.29474133253097534
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,64,64,128,1,float16,fp8,0,0.3285386761029561
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,64,64,128,1,fp8,fp8,0,0.32340266307195026
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,64,2,128,1,float16,float16,0,0.29713600873947144
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,64,2,128,1,float16,fp8,0,0.29916266600290936
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,64,4,128,1,float16,float16,0,0.30053333441416424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,64,2,128,1,fp8,fp8,0,0.29848533868789673
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,64,4,128,1,float16,fp8,0,0.3002026677131653
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,64,4,128,1,fp8,fp8,0,0.2984960079193115
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,64,8,128,1,float16,fp8,0,0.30190932750701904
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,64,1,128,1,float16,float16,0,0.18261333306630453
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,64,8,128,1,float16,float16,0,0.30292266607284546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,64,8,128,1,fp8,fp8,0,0.2995199958483378
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,64,64,128,1,float16,float16,0,0.19780800739924112
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,64,64,128,1,float16,fp8,0,0.20155733823776245
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,64,1,128,1,float16,fp8,0,0.18158932526906332
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,64,1,128,1,fp8,fp8,0,0.1793866753578186
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,64,64,128,1,fp8,fp8,0,0.19694934288660684
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,64,2,128,1,float16,float16,0,0.18312533696492514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,64,2,128,1,fp8,fp8,0,0.18076266845067343
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,64,4,128,1,float16,float16,0,0.18056533734003702
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,64,4,128,1,fp8,fp8,0,0.17920533816019693
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,64,4,128,1,float16,fp8,0,0.18414932489395142
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,64,2,128,1,float16,fp8,0,0.1834933360417684
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,64,8,128,1,float16,float16,0,0.18311999241511026
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,64,8,128,1,fp8,fp8,0,0.1795413295427958
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,64,8,128,1,float16,fp8,0,0.1858560045560201
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,64,1,128,1,float16,float16,0,5.198341369628906
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,64,1,128,1,float16,fp8,0,5.19869327545166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,64,1,128,1,fp8,fp8,0,5.213695844014485
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,64,2,128,1,float16,float16,0,5.224623998006185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,64,2,128,1,fp8,fp8,0,5.23143990834554
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,64,2,128,1,float16,fp8,0,5.194917360941569
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,64,4,128,1,float16,float16,0,5.2031145095825195
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,64,4,128,1,float16,fp8,0,5.265237490336101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,64,1,128,1,float16,float16,0,2.6340746879577637
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,64,4,128,1,fp8,fp8,0,5.2502241134643555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,64,1,128,1,float16,fp8,0,2.632362683614095
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,64,8,128,1,fp8,fp8,0,5.299370765686035
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,64,8,128,1,float16,float16,0,5.26421324412028
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,64,8,128,1,float16,fp8,0,5.246805191040039
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,64,64,128,1,float16,fp8,0,2.8963839213053384
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,64,64,128,1,float16,float16,0,2.8675254185994468
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,64,64,128,1,fp8,fp8,0,2.9364906946818032
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,64,1,128,1,fp8,fp8,0,2.6161492665608725
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,64,2,128,1,float16,fp8,0,2.619904041290283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,64,2,128,1,fp8,fp8,0,2.626906712849935
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,64,2,128,1,float16,float16,0,2.642944018046061
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,64,4,128,1,float16,float16,0,2.6137653986612954
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,64,4,128,1,float16,fp8,0,2.6168373425801597
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,64,4,128,1,fp8,fp8,0,2.6364639600118003
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,64,8,128,1,float16,float16,0,2.6272427241007485
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,64,8,128,1,float16,fp8,0,2.6402133305867515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,64,1,128,1,float16,float16,0,1.3335893948872883
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,64,8,128,1,fp8,fp8,0,2.6610347429911294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,64,1,128,1,float16,fp8,0,1.3356320063273113
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,64,1,128,1,fp8,fp8,0,1.3370025952657063
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,64,2,128,1,float16,float16,0,1.3400746981302898
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,64,64,128,1,float16,float16,0,1.4648267428080242
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,64,64,128,1,float16,fp8,0,1.4981120427449544
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,64,64,128,1,fp8,fp8,0,1.5015254020690918
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,64,2,128,1,float16,fp8,0,1.3380212783813477
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,64,2,128,1,fp8,fp8,0,1.3444959322611492
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,64,4,128,1,float16,float16,0,1.3417867024739583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,64,4,128,1,float16,fp8,0,1.3407573699951172
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,64,4,128,1,fp8,fp8,0,1.3492800394694011
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,64,8,128,1,float16,float16,0,1.3479199409484863
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,64,8,128,1,float16,fp8,0,1.365674654642741
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,64,8,128,1,fp8,fp8,0,1.3574825922648113
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,64,1,128,1,float16,float16,0,0.6973439852396647
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,64,1,128,1,float16,fp8,0,0.7014346917470297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,64,1,128,1,fp8,fp8,0,0.7014453411102295
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,64,64,128,1,float16,float16,0,0.7611733277638754
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,64,64,128,1,float16,fp8,0,0.7799413204193115
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,64,2,128,1,float16,float16,0,0.7058719793955485
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,64,2,128,1,float16,fp8,0,0.6993920008341471
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,64,2,128,1,fp8,fp8,0,0.7004160086313883
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,64,4,128,1,float16,float16,0,0.7045066356658936
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,64,64,128,1,fp8,fp8,0,0.7840426762898763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,64,4,128,1,float16,fp8,0,0.7051893075307211
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,64,4,128,1,fp8,fp8,0,0.7038293679555258
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,64,8,128,1,float16,float16,0,0.7072426478068033
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,64,8,128,1,float16,fp8,0,0.7144160270690918
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,64,8,128,1,fp8,fp8,0,0.7086079915364584
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,64,1,128,1,float16,float16,0,0.377679983774821
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,64,64,128,1,float16,float16,0,0.4089173475901286
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,64,1,128,1,float16,fp8,0,0.37699735164642334
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,64,1,128,1,fp8,fp8,0,0.3794026772181193
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,64,64,128,1,float16,fp8,0,0.4203840096791585
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,64,2,128,1,float16,float16,0,0.38040534655253094
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,64,2,128,1,float16,fp8,0,0.37836265563964844
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,64,64,128,1,fp8,fp8,0,0.4196693499883016
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,64,2,128,1,fp8,fp8,0,0.3834933439890544
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,64,4,128,1,float16,fp8,0,0.38076265652974445
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,64,4,128,1,float16,float16,0,0.3786880175272624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,64,4,128,1,fp8,fp8,0,0.3824533224105835
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,64,8,128,1,float16,float16,0,0.3831413189570109
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,64,8,128,1,fp8,fp8,0,0.38416532675425213
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,64,8,128,1,float16,fp8,0,0.3858826557795207
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,64,1,128,1,float16,float16,0,0.21163199345270792
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,64,1,128,1,float16,fp8,0,0.2129866679509481
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,64,64,128,1,float16,float16,0,0.23518399397532144
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,64,1,128,1,fp8,fp8,0,0.21744000911712646
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,64,2,128,1,float16,float16,0,0.21503466367721558
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,64,64,128,1,float16,fp8,0,0.2416586677233378
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,64,64,128,1,fp8,fp8,0,0.24029332399368286
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,64,2,128,1,float16,fp8,0,0.21811199188232422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,64,2,128,1,fp8,fp8,0,0.21742933988571167
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,64,4,128,1,float16,float16,0,0.2143519918123881
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,64,4,128,1,float16,fp8,0,0.2160266637802124
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,64,4,128,1,fp8,fp8,0,0.21913067499796549
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,64,8,128,1,float16,float16,0,0.21708800395329794
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,64,8,128,1,float16,fp8,0,0.21947733561197916
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,64,8,128,1,fp8,fp8,0,0.2218453288078308
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,64,64,128,1,float16,fp8,0,0.15052800377209982
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,64,64,128,1,float16,float16,0,0.1472586691379547
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,64,1,128,1,float16,float16,0,0.13806399703025818
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,64,1,128,1,float16,fp8,0,0.1365386644999186
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,64,64,128,1,fp8,fp8,0,0.15034133195877075
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,64,1,128,1,fp8,fp8,0,0.13704533378283182
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,64,2,128,1,float16,float16,0,0.1360213359196981
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,64,2,128,1,float16,fp8,0,0.1378933290640513
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,64,4,128,1,float16,float16,0,0.13806399703025818
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,64,4,128,1,float16,fp8,0,0.13942933082580566
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,64,8,128,1,float16,float16,0,0.13809067010879517
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,64,4,128,1,fp8,fp8,0,0.13876266280810037
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,64,8,128,1,float16,fp8,0,0.13755200306574503
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,64,8,128,1,fp8,fp8,0,0.13705066839853922
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,64,2,128,1,fp8,fp8,0,0.13821333646774292
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,64,1,128,1,float16,float16,0,5.782704035441081
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,64,1,128,1,float16,fp8,0,5.839701334635417
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,64,1,128,1,fp8,fp8,0,6.090239842732747
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,64,2,128,1,float16,float16,0,5.901994705200195
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,64,2,128,1,float16,fp8,0,5.867690404256185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,64,2,128,1,fp8,fp8,0,6.259034474690755
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,64,4,128,1,float16,fp8,0,6.070271809895833
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,64,4,128,1,float16,float16,0,6.083242416381836
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,64,1,128,1,float16,float16,0,2.9083251953125
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,64,4,128,1,fp8,fp8,0,6.437034606933594
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,64,1,128,1,float16,fp8,0,2.9104159673055015
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,64,8,128,1,float16,fp8,0,6.1182295481363935
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,64,8,128,1,fp8,fp8,0,6.464682896931966
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,64,8,128,1,float16,float16,0,6.161061604817708
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,64,64,128,1,float16,float16,0,3.37664000193278
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,64,64,128,1,float16,fp8,0,3.301040013631185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,64,64,128,1,fp8,fp8,0,3.510613441467285
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,64,1,128,1,fp8,fp8,0,3.0417919158935547
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,64,2,128,1,float16,float16,0,2.9230079650878906
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,64,2,128,1,float16,fp8,0,2.91105588277181
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,64,2,128,1,fp8,fp8,0,3.078319867451986
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,64,4,128,1,float16,float16,0,2.964821179707845
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,64,4,128,1,float16,fp8,0,2.969600041707357
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,64,4,128,1,fp8,fp8,0,3.2373812993367515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,64,8,128,1,float16,float16,0,2.979498545328776
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,64,8,128,1,float16,fp8,0,3.0071519215901694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,64,1,128,1,float16,float16,0,1.4658560752868652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,64,1,128,1,float16,fp8,0,1.4644853274027507
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,64,8,128,1,fp8,fp8,0,3.247957229614258
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,64,1,128,1,fp8,fp8,0,1.5325867335001628
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,64,64,128,1,float16,float16,0,1.6848266919453938
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,64,2,128,1,float16,float16,0,1.467733383178711
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,64,64,128,1,float16,fp8,0,1.6611040433247883
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,64,2,128,1,float16,fp8,0,1.4680693944295247
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,64,2,128,1,fp8,fp8,0,1.5414667129516602
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,64,64,128,1,fp8,fp8,0,1.749498685201009
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,64,4,128,1,float16,float16,0,1.473541259765625
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,64,4,128,1,float16,fp8,0,1.4742186864217122
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,64,4,128,1,fp8,fp8,0,1.5993067423502605
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,64,8,128,1,float16,float16,0,1.4868693351745605
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,64,8,128,1,float16,fp8,0,1.4892373085021973
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,64,8,128,1,fp8,fp8,0,1.5853172938028972
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,64,1,128,1,float16,float16,0,0.7427413463592529
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,64,1,128,1,float16,fp8,0,0.7420586744944254
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,64,1,128,1,fp8,fp8,0,0.774837334950765
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,64,2,128,1,float16,float16,0,0.7478613058725992
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,64,64,128,1,float16,float16,0,0.8326826890309652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,64,2,128,1,float16,fp8,0,0.7481973171234131
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,64,64,128,1,fp8,fp8,0,0.8710827032725016
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,64,64,128,1,float16,fp8,0,0.8224426905314127
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,64,2,128,1,fp8,fp8,0,0.779263973236084
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,64,4,128,1,float16,float16,0,0.7485439777374268
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,64,4,128,1,fp8,fp8,0,0.7869386672973633
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,64,4,128,1,float16,fp8,0,0.7475252946217855
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,64,8,128,1,float16,float16,0,0.7529760201772054
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,64,8,128,1,float16,fp8,0,0.7512746651967367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,64,8,128,1,fp8,fp8,0,0.795471986134847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,64,1,128,1,float16,float16,0,0.3858400185902913
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,64,1,128,1,float16,fp8,0,0.38281067212422687
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,64,64,128,1,float16,float16,0,0.4312746524810791
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,64,1,128,1,fp8,fp8,0,0.39798935254414874
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,64,64,128,1,float16,fp8,0,0.4203466574350993
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,64,2,128,1,float16,float16,0,0.3848533233006795
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,64,2,128,1,float16,fp8,0,0.38656000296274823
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,64,64,128,1,fp8,fp8,0,0.44697598616282147
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,64,2,128,1,fp8,fp8,0,0.39903998374938965
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,64,4,128,1,float16,float16,0,0.38656532764434814
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,64,4,128,1,float16,fp8,0,0.389296015103658
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,64,4,128,1,fp8,fp8,0,0.40140799681345624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,64,8,128,1,float16,fp8,0,0.3882666826248169
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,64,8,128,1,float16,float16,0,0.38860801855723065
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,64,8,128,1,fp8,fp8,0,0.4065226713816325
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,64,1,128,1,float16,float16,0,0.20445332924524942
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,64,1,128,1,fp8,fp8,0,0.21026132504145303
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,64,1,128,1,float16,fp8,0,0.2039466698964437
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,64,64,128,1,float16,float16,0,0.2283573349316915
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,64,64,128,1,float16,fp8,0,0.22357332706451416
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,64,2,128,1,float16,float16,0,0.2034346659978231
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,64,2,128,1,float16,fp8,0,0.20377600193023682
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,64,64,128,1,fp8,fp8,0,0.23586134115854898
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,64,4,128,1,float16,float16,0,0.2032639980316162
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,64,2,128,1,fp8,fp8,0,0.2106026609738668
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,64,4,128,1,fp8,fp8,0,0.2105813423792521
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,64,4,128,1,float16,fp8,0,0.2058239976565043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,64,8,128,1,float16,float16,0,0.20684800545374551
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,64,8,128,1,float16,fp8,0,0.2063360015551249
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,64,8,128,1,fp8,fp8,0,0.21506667137145996
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,64,64,128,1,float16,float16,0,0.1290079951286316
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,64,1,128,1,float16,float16,0,0.11162133018175761
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,64,1,128,1,fp8,fp8,0,0.11365866661071777
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,64,1,128,1,float16,fp8,0,0.11059733231862386
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,64,2,128,1,float16,float16,0,0.11194133758544922
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,64,64,128,1,float16,fp8,0,0.1256053348382314
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,64,64,128,1,fp8,fp8,0,0.13431466619173685
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,64,2,128,1,float16,fp8,0,0.11195733149846394
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,64,4,128,1,float16,float16,0,0.11196266611417134
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,64,2,128,1,fp8,fp8,0,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,64,4,128,1,float16,fp8,0,0.11228799819946289
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,64,4,128,1,fp8,fp8,0,0.11400533715883891
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,64,8,128,1,float16,float16,0,0.1129866639773051
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,64,8,128,1,float16,fp8,0,0.11332266529401143
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,64,1,128,1,float16,float16,0,0.068271999557813
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,64,1,128,1,float16,fp8,0,0.0675786683956782
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,64,8,128,1,fp8,fp8,0,0.11742400129636128
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,64,64,128,1,float16,fp8,0,0.07134399811426799
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,64,64,128,1,float16,float16,0,0.07165866593519847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,64,64,128,1,fp8,fp8,0,0.07612266639868419
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,64,2,128,1,float16,float16,0,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,64,1,128,1,fp8,fp8,0,0.06758399804433186
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,64,2,128,1,float16,fp8,0,0.06758933266003926
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,64,2,128,1,fp8,fp8,0,0.06655466556549072
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,64,4,128,1,float16,fp8,0,0.06724800169467926
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,64,4,128,1,float16,float16,0,0.06930666665236156
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,64,4,128,1,fp8,fp8,0,0.06723733246326447
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,64,8,128,1,float16,float16,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,64,8,128,1,float16,fp8,0,0.06860266625881195
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,64,8,128,1,fp8,fp8,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,64,1,128,1,float16,float16,0,5.071877479553223
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,64,1,128,1,fp8,fp8,0,5.61186154683431
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,64,1,128,1,float16,fp8,0,5.050026575724284
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,64,2,128,1,fp8,fp8,0,5.640192031860352
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,64,2,128,1,float16,fp8,0,5.106165250142415
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,64,2,128,1,float16,float16,0,5.142383893330892
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,64,4,128,1,float16,float16,0,5.280255953470866
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,64,4,128,1,float16,fp8,0,5.309263865152995
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,64,1,128,1,float16,float16,0,2.521941343943278
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,64,4,128,1,fp8,fp8,0,5.821952184041341
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,64,1,128,1,float16,fp8,0,2.5236426989237466
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,64,8,128,1,float16,float16,0,5.343407948811849
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,64,8,128,1,float16,fp8,0,5.3567148844401045
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,64,8,128,1,fp8,fp8,0,5.7980531056722
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,64,64,128,1,fp8,fp8,0,3.1878932317097983
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,64,64,128,1,float16,float16,0,2.9972426096598306
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,64,64,128,1,float16,fp8,0,2.934783935546875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,64,1,128,1,fp8,fp8,0,2.7451680501302085
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,64,2,128,1,float16,float16,0,2.528085390726725
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,64,2,128,1,fp8,fp8,0,2.7730026245117188
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,64,2,128,1,float16,fp8,0,2.5372907320658364
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,64,4,128,1,float16,float16,0,2.55403200785319
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,64,4,128,1,float16,fp8,0,2.59498659769694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,64,4,128,1,fp8,fp8,0,2.8940000534057617
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,64,8,128,1,float16,float16,0,2.6076159477233887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,64,8,128,1,float16,fp8,0,2.6368160247802734
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,64,1,128,1,float16,float16,0,1.27402663230896
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,64,1,128,1,float16,fp8,0,1.2757493654886882
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,64,8,128,1,fp8,fp8,0,2.928821245829264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,64,1,128,1,fp8,fp8,0,1.3725013732910156
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,64,64,128,1,float16,float16,0,1.4827466011047363
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,64,64,128,1,float16,fp8,0,1.4522026379903157
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,64,2,128,1,float16,float16,0,1.2798293431599934
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,64,64,128,1,fp8,fp8,0,1.5791680018107097
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,64,2,128,1,float16,fp8,0,1.27674667040507
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,64,2,128,1,fp8,fp8,0,1.3810292879740398
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,64,4,128,1,float16,float16,0,1.2842666308085124
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,64,4,128,1,float16,fp8,0,1.2876799901326497
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,64,4,128,1,fp8,fp8,0,1.4445279439290364
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,64,8,128,1,float16,fp8,0,1.2921226819356282
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,64,8,128,1,float16,float16,0,1.2958772977193196
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,64,1,128,1,float16,float16,0,0.6500746806462606
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,64,8,128,1,fp8,fp8,0,1.4508320490519206
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,64,1,128,1,float16,fp8,0,0.6490453481674194
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,64,1,128,1,fp8,fp8,0,0.6953279972076416
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,64,64,128,1,float16,float16,0,0.7338720162709554
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,64,2,128,1,float16,float16,0,0.6466506719589233
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,64,64,128,1,float16,fp8,0,0.7222452958424886
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,64,2,128,1,float16,fp8,0,0.652458667755127
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,64,64,128,1,fp8,fp8,0,0.793077309926351
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,64,2,128,1,fp8,fp8,0,0.6973439852396647
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,64,4,128,1,float16,float16,0,0.652453343073527
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,64,4,128,1,float16,fp8,0,0.6504053274790446
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,64,4,128,1,fp8,fp8,0,0.7031413714090983
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,64,8,128,1,float16,float16,0,0.6553653478622437
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,64,8,128,1,float16,fp8,0,0.6567253271738688
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,64,8,128,1,fp8,fp8,0,0.7113440036773682
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,64,1,128,1,float16,float16,0,0.33604268232981366
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,64,1,128,1,float16,fp8,0,0.3346506754557292
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,64,1,128,1,fp8,fp8,0,0.358570655186971
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,64,64,128,1,float16,float16,0,0.37938666343688965
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,64,64,128,1,float16,fp8,0,0.36982933680216473
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,64,64,128,1,fp8,fp8,0,0.40721599260965985
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,64,2,128,1,float16,float16,0,0.33638401826222736
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,64,2,128,1,float16,fp8,0,0.33536001046498615
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,64,2,128,1,fp8,fp8,0,0.35891199111938477
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,64,4,128,1,float16,float16,0,0.33636267979939777
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,64,4,128,1,float16,fp8,0,0.336736003557841
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,64,4,128,1,fp8,fp8,0,0.3599413235982259
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,64,8,128,1,float16,float16,0,0.33911999066670734
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,64,8,128,1,float16,fp8,0,0.3377386728922526
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,64,8,128,1,fp8,fp8,0,0.36471466223398846
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,64,1,128,1,float16,float16,0,0.1776640017827352
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,64,1,128,1,float16,fp8,0,0.1776640017827352
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,64,64,128,1,float16,float16,0,0.2020639975865682
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,64,1,128,1,fp8,fp8,0,0.18978132804234824
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,64,64,128,1,float16,fp8,0,0.19746132691701254
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,64,64,128,1,fp8,fp8,0,0.21504533290863037
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,64,2,128,1,float16,float16,0,0.17493333419164023
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,64,2,128,1,float16,fp8,0,0.17595199743906656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,64,2,128,1,fp8,fp8,0,0.1884160041809082
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,64,4,128,1,float16,fp8,0,0.17919999361038208
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,64,4,128,1,float16,float16,0,0.17734400431315103
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,64,4,128,1,fp8,fp8,0,0.18995199600855509
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,64,8,128,1,float16,float16,0,0.1790293256441752
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,64,8,128,1,float16,fp8,0,0.1802240014076233
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,64,8,128,1,fp8,fp8,0,0.1930239995320638
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,64,1,128,1,float16,fp8,0,0.09762133161226909
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,64,1,128,1,float16,float16,0,0.09727999567985535
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,64,1,128,1,fp8,fp8,0,0.10205866893132527
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,64,2,128,1,float16,float16,0,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,64,64,128,1,float16,float16,0,0.1129866639773051
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,64,2,128,1,float16,fp8,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,64,64,128,1,float16,fp8,0,0.10990933577219646
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,64,64,128,1,fp8,fp8,0,0.12220266461372375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,64,4,128,1,float16,fp8,0,0.0986400047938029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,64,2,128,1,fp8,fp8,0,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,64,4,128,1,fp8,fp8,0,0.10376532872517903
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,64,4,128,1,float16,float16,0,0.09762133161226909
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,64,8,128,1,float16,float16,0,0.09865066409111023
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,64,8,128,1,float16,fp8,0,0.09898666540781657
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,64,8,128,1,fp8,fp8,0,0.10377066334088643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,64,1,128,1,float16,float16,0,0.05699733396371206
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,64,64,128,1,float16,float16,0,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,64,1,128,1,float16,fp8,0,0.05665599803129832
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,64,1,128,1,fp8,fp8,0,0.05905066430568695
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,64,2,128,1,float16,fp8,0,0.057333335280418396
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,64,2,128,1,float16,float16,0,0.05836800237496694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,64,64,128,1,float16,fp8,0,0.060415998101234436
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,64,64,128,1,fp8,fp8,0,0.06621333460013072
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,64,2,128,1,fp8,fp8,0,0.058703998724619545
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,64,4,128,1,float16,float16,0,0.057002668579419456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,64,4,128,1,fp8,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,64,4,128,1,float16,fp8,0,0.057002668579419456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,64,8,128,1,float16,float16,0,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,64,8,128,1,fp8,fp8,0,0.06006399790445963
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,64,64,128,1,float16,fp8,0,0.03719999889532725
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,64,8,128,1,float16,fp8,0,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,64,64,128,1,float16,float16,0,0.03549333413441976
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,64,64,128,1,fp8,fp8,0,0.03926933308442434
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,64,1,128,1,float16,float16,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,64,1,128,1,fp8,fp8,0,0.03616533428430557
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,64,2,128,1,float16,fp8,0,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,64,2,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,64,1,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,64,2,128,1,fp8,fp8,0,0.03618133316437403
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,64,4,128,1,float16,float16,0,0.03548266738653183
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,64,4,128,1,float16,fp8,0,0.03517866631348928
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,64,4,128,1,fp8,fp8,0,0.036864000062147774
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,64,8,128,1,float16,float16,0,0.03549333413441976
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,64,8,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,64,8,128,1,fp8,fp8,0,0.03686933219432831
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,64,1,128,1,float16,float16,0,1.8418292999267578
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,64,1,128,1,float16,fp8,0,1.8438827196757
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,64,1,128,1,fp8,fp8,0,1.977359930674235
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,64,2,128,1,fp8,fp8,0,2.0524214108784995
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,64,2,128,1,float16,float16,0,1.8630080223083496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,64,2,128,1,float16,fp8,0,1.855493386586507
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,64,4,128,1,float16,float16,0,1.8725600242614746
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,64,4,128,1,float16,fp8,0,1.8602666854858398
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,64,1,128,1,float16,float16,0,0.9350826740264893
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,64,4,128,1,fp8,fp8,0,2.1357226371765137
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,64,1,128,1,float16,fp8,0,0.9361120065053304
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,64,8,128,1,fp8,fp8,0,2.1708799997965493
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,64,8,128,1,float16,float16,0,1.9304107030232747
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,64,8,128,1,float16,fp8,0,1.9341600735982258
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,64,64,128,1,fp8,fp8,0,1.2151626745859783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,64,64,128,1,float16,float16,0,1.13100798924764
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,64,64,128,1,float16,fp8,0,1.1084799766540527
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,64,1,128,1,fp8,fp8,0,1.0013226668039958
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,64,2,128,1,float16,float16,0,0.9381600220998129
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,64,2,128,1,float16,fp8,0,0.9371519883473715
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,64,2,128,1,fp8,fp8,0,1.0084693431854248
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,64,4,128,1,float16,float16,0,0.9402080376942953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,64,4,128,1,float16,fp8,0,0.9439199765523275
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,64,4,128,1,fp8,fp8,0,1.032362699508667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,64,8,128,1,float16,float16,0,0.9538613160451254
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,64,8,128,1,float16,fp8,0,0.9494187037150065
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,64,8,128,1,fp8,fp8,0,1.0371413230895996
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,64,1,128,1,float16,float16,0,0.48179201285044354
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,64,1,128,1,float16,fp8,0,0.48110934098561603
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,64,1,128,1,fp8,fp8,0,0.5101120074590048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,64,64,128,1,float16,float16,0,0.5723893245061239
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,64,64,128,1,float16,fp8,0,0.558079997698466
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,64,2,128,1,float16,float16,0,0.48179201285044354
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,64,64,128,1,fp8,fp8,0,0.6103093226750692
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,64,2,128,1,float16,fp8,0,0.4790666500727336
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,64,2,128,1,fp8,fp8,0,0.5128533442815145
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,64,4,128,1,float16,float16,0,0.4834933280944824
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,64,4,128,1,float16,fp8,0,0.4838346640268962
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,64,4,128,1,fp8,fp8,0,0.5179733435312907
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,64,8,128,1,float16,float16,0,0.48931201299031574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,64,8,128,1,float16,fp8,0,0.4882773160934448
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,64,8,128,1,fp8,fp8,0,0.5253066619237264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,64,1,128,1,float16,float16,0,0.2532693346341451
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,64,1,128,1,float16,fp8,0,0.25464000304539997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,64,64,128,1,float16,float16,0,0.2984960079193115
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,64,64,128,1,float16,fp8,0,0.2940640052159627
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,64,1,128,1,fp8,fp8,0,0.26470933357874554
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,64,2,128,1,float16,float16,0,0.2525920073191325
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,64,2,128,1,float16,fp8,0,0.25088000297546387
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,64,2,128,1,fp8,fp8,0,0.26470400889714557
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,64,64,128,1,fp8,fp8,0,0.31590400139490765
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,64,4,128,1,float16,float16,0,0.2543039917945862
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,64,4,128,1,float16,fp8,0,0.2515626748402913
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,64,4,128,1,fp8,fp8,0,0.26743467648824054
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,64,8,128,1,float16,float16,0,0.2577173312505086
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,64,8,128,1,float16,fp8,0,0.258735994497935
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,64,8,128,1,fp8,fp8,0,0.2725546757380168
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,64,1,128,1,float16,float16,0,0.13380266229311624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,64,64,128,1,float16,float16,0,0.16247999668121338
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,64,64,128,1,float16,fp8,0,0.15633066495259604
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,64,64,128,1,fp8,fp8,0,0.16691199938456217
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,64,1,128,1,float16,fp8,0,0.13618666927019754
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,64,1,128,1,fp8,fp8,0,0.14148267110188803
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,64,2,128,1,float16,float16,0,0.13758400082588196
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,64,2,128,1,fp8,fp8,0,0.1437013347943624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,64,2,128,1,float16,fp8,0,0.13414399822553
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,64,4,128,1,fp8,fp8,0,0.1421173314253489
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,64,4,128,1,float16,float16,0,0.13823999961217245
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,64,4,128,1,float16,fp8,0,0.13688000043233237
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,64,8,128,1,float16,float16,0,0.13875200351079306
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,64,8,128,1,float16,fp8,0,0.1402773360411326
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,64,8,128,1,fp8,fp8,0,0.14727466305096945
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,64,1,128,1,fp8,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,64,1,128,1,float16,float16,0,0.077824001510938
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,64,1,128,1,float16,fp8,0,0.07717333237330119
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,64,64,128,1,float16,float16,0,0.09284266829490662
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,64,64,128,1,float16,fp8,0,0.09013332923253377
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,64,64,128,1,fp8,fp8,0,0.09762133161226909
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,64,2,128,1,float16,float16,0,0.07678399980068207
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,64,2,128,1,float16,fp8,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,64,2,128,1,fp8,fp8,0,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,64,4,128,1,float16,float16,0,0.07818666597207387
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,64,4,128,1,float16,fp8,0,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,64,4,128,1,fp8,fp8,0,0.08020799855391185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,64,8,128,1,float16,float16,0,0.07611200213432312
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,64,8,128,1,float16,fp8,0,0.07814399898052216
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,64,8,128,1,fp8,fp8,0,0.08123733103275299
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,64,1,128,1,float16,float16,0,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,64,1,128,1,float16,fp8,0,0.047450666626294456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,64,64,128,1,float16,float16,0,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,64,64,128,1,float16,fp8,0,0.05187733471393585
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,64,2,128,1,float16,float16,0,0.047775998711586
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,64,64,128,1,fp8,fp8,0,0.0566293348868688
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,64,1,128,1,fp8,fp8,0,0.04642133414745331
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,64,2,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,64,2,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,64,4,128,1,float16,float16,0,0.04576000074545542
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,64,4,128,1,fp8,fp8,0,0.047770669062932335
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,64,8,128,1,float16,float16,0,0.0481333335240682
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,64,4,128,1,float16,fp8,0,0.04741866886615753
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,64,8,128,1,float16,fp8,0,0.048469334840774536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,64,8,128,1,fp8,fp8,0,0.047456001242001854
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,64,64,128,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,64,1,128,1,float16,float16,0,0.030042665700117748
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,64,1,128,1,float16,fp8,0,0.02934933453798294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,64,64,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,64,64,128,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,64,1,128,1,fp8,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,64,2,128,1,float16,float16,0,0.029359998802344005
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,64,2,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,64,2,128,1,fp8,fp8,0,0.03107733279466629
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,64,4,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,64,4,128,1,float16,fp8,0,0.029370665550231934
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,64,4,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,64,8,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,64,8,128,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,64,1,128,1,float16,float16,0,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,64,64,128,1,float16,fp8,0,0.025589334468046825
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,64,1,128,1,float16,fp8,0,0.023893333971500397
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,64,8,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,64,64,128,1,fp8,fp8,0,0.025616000096003216
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,64,64,128,1,float16,float16,0,0.025605333348115284
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,64,1,128,1,fp8,fp8,0,0.024570666253566742
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,64,2,128,1,float16,fp8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,64,2,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,64,4,128,1,float16,float16,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,64,2,128,1,float16,float16,0,0.0249493345618248
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,64,4,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,64,4,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,64,8,128,1,float16,float16,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,64,8,128,1,fp8,fp8,0,0.025237334271272022
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,64,8,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,64,1,128,1,float16,fp8,0,1.0019839604695637
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,64,1,128,1,float16,float16,0,1.0033493041992188
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,64,1,128,1,fp8,fp8,0,1.040218671162923
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,64,2,128,1,float16,float16,0,1.0036853154500325
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,64,2,128,1,float16,fp8,0,1.0063947041829426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,64,2,128,1,fp8,fp8,0,1.0511360168457031
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,64,4,128,1,float16,float16,0,1.0180319945017497
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,64,4,128,1,float16,fp8,0,1.0118772983551025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,64,1,128,1,float16,float16,0,0.5111466646194458
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,64,4,128,1,fp8,fp8,0,1.12009064356486
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,64,8,128,1,fp8,fp8,0,1.0972106456756592
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,64,8,128,1,float16,float16,0,1.026911973953247
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,64,8,128,1,float16,fp8,0,1.0207573572794597
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,64,1,128,1,float16,fp8,0,0.5101226568222046
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,64,64,128,1,float16,float16,0,0.6055200099945068
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,64,64,128,1,fp8,fp8,0,0.6323200066884359
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,64,64,128,1,float16,fp8,0,0.5860693454742432
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,64,1,128,1,fp8,fp8,0,0.5307573477427164
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,64,2,128,1,float16,float16,0,0.5077279806137085
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,64,2,128,1,float16,fp8,0,0.508410652478536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,64,2,128,1,fp8,fp8,0,0.5341493288675944
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,64,4,128,1,float16,float16,0,0.5131893157958984
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,64,4,128,1,fp8,fp8,0,0.5369120041529337
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,64,4,128,1,float16,fp8,0,0.5128586689631144
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,64,8,128,1,float16,float16,0,0.516266663869222
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,64,8,128,1,float16,fp8,0,0.5172906716664633
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,64,8,128,1,fp8,fp8,0,0.5474986632664999
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,64,1,128,1,float16,float16,0,0.2633333404858907
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,64,1,128,1,float16,fp8,0,0.2653866608937581
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,64,1,128,1,fp8,fp8,0,0.27323732773462933
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,64,2,128,1,float16,float16,0,0.2653866608937581
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,64,64,128,1,float16,float16,0,0.31351999441782635
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,64,64,128,1,float16,fp8,0,0.30737600723902386
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,64,64,128,1,fp8,fp8,0,0.3288853367169698
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,64,2,128,1,float16,fp8,0,0.2633333404858907
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,64,2,128,1,fp8,fp8,0,0.27562665939331055
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,64,4,128,1,float16,float16,0,0.26606400807698566
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,64,4,128,1,float16,fp8,0,0.26368000109990436
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,64,4,128,1,fp8,fp8,0,0.27767467498779297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,64,8,128,1,float16,float16,0,0.26982933282852173
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,64,8,128,1,float16,fp8,0,0.2701653242111206
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,64,8,128,1,fp8,fp8,0,0.283135990301768
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,64,1,128,1,float16,fp8,0,0.13960533340771994
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,64,1,128,1,float16,float16,0,0.14216533303260803
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,64,64,128,1,float16,float16,0,0.16538133223851523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,64,64,128,1,float16,fp8,0,0.16452800234158835
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,64,1,128,1,fp8,fp8,0,0.14728533228238425
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,64,64,128,1,fp8,fp8,0,0.17425066232681274
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,64,2,128,1,float16,float16,0,0.1418239971001943
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,64,2,128,1,float16,fp8,0,0.14079999923706055
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,64,2,128,1,fp8,fp8,0,0.14728533228238425
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,64,4,128,1,float16,float16,0,0.14353066682815552
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,64,4,128,1,float16,fp8,0,0.14114666978518167
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,64,4,128,1,fp8,fp8,0,0.14813866217931113
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,64,8,128,1,float16,float16,0,0.14251200358072916
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,64,8,128,1,float16,fp8,0,0.14506133397420248
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,64,1,128,1,float16,float16,0,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,64,8,128,1,fp8,fp8,0,0.1520906686782837
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,64,1,128,1,float16,fp8,0,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,64,64,128,1,float16,float16,0,0.09213333328564961
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,64,64,128,1,float16,fp8,0,0.0897653301556905
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,64,64,128,1,fp8,fp8,0,0.09966400265693665
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,64,1,128,1,fp8,fp8,0,0.08058133224646251
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,64,2,128,1,float16,float16,0,0.07851199805736542
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,64,2,128,1,float16,fp8,0,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,64,4,128,1,float16,float16,0,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,64,2,128,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,64,4,128,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,64,4,128,1,float16,fp8,0,0.07850133379300435
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,64,8,128,1,float16,float16,0,0.0784800002972285
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,64,1,128,1,float16,float16,0,0.04779199759165446
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,64,8,128,1,float16,fp8,0,0.07884799937407176
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,64,8,128,1,fp8,fp8,0,0.08226666847864787
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,64,64,128,1,float16,float16,0,0.0529013325770696
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,64,1,128,1,float16,fp8,0,0.047466665506362915
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,64,64,128,1,float16,fp8,0,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,64,64,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,64,1,128,1,fp8,fp8,0,0.04948266843954722
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,64,2,128,1,float16,float16,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,64,2,128,1,float16,fp8,0,0.04846400022506714
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,64,4,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,64,4,128,1,fp8,fp8,0,0.04781333108743032
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,64,2,128,1,fp8,fp8,0,0.047781333327293396
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,64,4,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,64,8,128,1,float16,float16,0,0.04780800143877665
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,64,8,128,1,float16,fp8,0,0.048469334840774536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,64,8,128,1,fp8,fp8,0,0.04982399940490723
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,64,1,128,1,float16,float16,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,64,64,128,1,fp8,fp8,0,0.03512533257404963
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,64,1,128,1,float16,fp8,0,0.03108799954255422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,64,64,128,1,float16,fp8,0,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,64,64,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,64,2,128,1,float16,fp8,0,0.03140799949566523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,64,2,128,1,float16,float16,0,0.03107733279466629
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,64,1,128,1,fp8,fp8,0,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,64,4,128,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,64,2,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,64,4,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,64,8,128,1,float16,fp8,0,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,64,8,128,1,fp8,fp8,0,0.03140799949566523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,64,4,128,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,64,8,128,1,float16,float16,0,0.03143466760714849
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,64,64,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,64,64,128,1,float16,float16,0,0.022842665513356526
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,64,1,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,64,1,128,1,float16,float16,0,0.02253866692384084
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,64,1,128,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,64,64,128,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,64,2,128,1,float16,float16,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,64,2,128,1,fp8,fp8,0,0.022848000129063923
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,64,4,128,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,64,4,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,64,8,128,1,float16,float16,0,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,64,2,128,1,float16,fp8,0,0.022181332111358643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,64,4,128,1,float16,float16,0,0.021146667500336964
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,64,8,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,64,64,128,1,float16,float16,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,64,64,128,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,64,1,128,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,64,8,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,64,64,128,1,fp8,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,64,1,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,64,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,64,2,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,64,2,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,64,4,128,1,float16,float16,0,0.018789333601792652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,64,1,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,64,4,128,1,fp8,fp8,0,0.01876266673207283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,64,8,128,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,64,8,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,64,4,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,64,8,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,64,1,128,1,float16,float16,0,0.680618683497111
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,64,1,128,1,float16,fp8,0,0.6761813163757324
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,64,1,128,1,fp8,fp8,0,0.7587839762369791
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,64,2,128,1,float16,float16,0,0.6799413363138834
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,64,2,128,1,fp8,fp8,0,0.7536693414052328
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,64,2,128,1,float16,fp8,0,0.6788907051086426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,64,4,128,1,float16,float16,0,0.6823200384775797
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,64,4,128,1,float16,fp8,0,0.6850773493448893
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,64,1,128,1,float16,float16,0,0.34867199261983234
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,64,4,128,1,fp8,fp8,0,0.7611733277638754
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,64,8,128,1,float16,float16,0,0.6871039867401123
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,64,8,128,1,fp8,fp8,0,0.7662879625956217
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,64,8,128,1,float16,fp8,0,0.6912000179290771
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,64,1,128,1,float16,fp8,0,0.3510613441467285
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,64,64,128,1,float16,float16,0,0.3952639897664388
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,64,64,128,1,float16,fp8,0,0.3906559944152832
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,64,64,128,1,fp8,fp8,0,0.43536531925201416
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,64,1,128,1,fp8,fp8,0,0.38281067212422687
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,64,2,128,1,float16,fp8,0,0.34833065668741864
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,64,2,128,1,float16,float16,0,0.34935998916625977
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,64,2,128,1,fp8,fp8,0,0.3862239917119344
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,64,4,128,1,float16,fp8,0,0.34935466448465985
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,64,4,128,1,float16,float16,0,0.3524266481399536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,64,4,128,1,fp8,fp8,0,0.38656000296274823
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,64,8,128,1,float16,float16,0,0.35140268007914227
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,64,8,128,1,float16,fp8,0,0.35075199604034424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,64,8,128,1,fp8,fp8,0,0.3892853260040283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,64,1,128,1,float16,float16,0,0.18346667289733887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,64,64,128,1,float16,float16,0,0.20889600118001303
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,64,1,128,1,float16,fp8,0,0.18501333395640054
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,64,64,128,1,float16,fp8,0,0.20480533440907797
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,64,2,128,1,float16,float16,0,0.18483734130859375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,64,1,128,1,fp8,fp8,0,0.20070399840672812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,64,64,128,1,fp8,fp8,0,0.22664000590642294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,64,2,128,1,float16,fp8,0,0.18397865692774454
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,64,2,128,1,fp8,fp8,0,0.2008799910545349
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,64,4,128,1,float16,float16,0,0.18466132879257202
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,64,4,128,1,float16,fp8,0,0.18447999159495035
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,64,4,128,1,fp8,fp8,0,0.2032639980316162
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,64,8,128,1,float16,float16,0,0.18754132588704428
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,64,8,128,1,float16,fp8,0,0.18517865737279257
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,64,8,128,1,fp8,fp8,0,0.20428800582885742
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,64,64,128,1,float16,float16,0,0.11332266529401143
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,64,1,128,1,float16,float16,0,0.09866666793823242
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,64,2,128,1,float16,float16,0,0.09865066409111023
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,64,1,128,1,float16,fp8,0,0.09966933727264404
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,64,64,128,1,float16,fp8,0,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,64,2,128,1,float16,fp8,0,0.0993226667245229
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,64,64,128,1,fp8,fp8,0,0.12663466731707254
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,64,1,128,1,fp8,fp8,0,0.106495996316274
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,64,2,128,1,fp8,fp8,0,0.10547733306884766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,64,4,128,1,float16,float16,0,0.09865066409111023
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,64,4,128,1,float16,fp8,0,0.09966933727264404
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,64,4,128,1,fp8,fp8,0,0.10717333356539409
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,64,8,128,1,float16,fp8,0,0.10102933645248413
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,64,8,128,1,float16,float16,0,0.10000532865524292
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,64,8,128,1,fp8,fp8,0,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,64,64,128,1,float16,float16,0,0.06348266700903575
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,64,64,128,1,float16,fp8,0,0.06213866670926412
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,64,1,128,1,fp8,fp8,0,0.062128002444903054
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,64,1,128,1,float16,float16,0,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,64,1,128,1,float16,fp8,0,0.0580320010582606
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,64,2,128,1,float16,fp8,0,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,64,64,128,1,fp8,fp8,0,0.06963199873765309
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,64,2,128,1,float16,float16,0,0.05871466795603434
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,64,2,128,1,fp8,fp8,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,64,4,128,1,float16,fp8,0,0.05972800155480703
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,64,4,128,1,float16,float16,0,0.0580320010582606
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,64,4,128,1,fp8,fp8,0,0.062128002444903054
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,64,8,128,1,float16,fp8,0,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,64,8,128,1,float16,float16,0,0.0580213318268458
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,64,64,128,1,float16,fp8,0,0.038912000755469
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,64,8,128,1,fp8,fp8,0,0.0631466656923294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,64,64,128,1,float16,float16,0,0.039274667700131737
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,64,1,128,1,float16,float16,0,0.03755199909210205
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,64,64,128,1,fp8,fp8,0,0.04095999896526337
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,64,2,128,1,float16,fp8,0,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,64,1,128,1,float16,fp8,0,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,64,1,128,1,fp8,fp8,0,0.038912000755469
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,64,4,128,1,float16,float16,0,0.03755199909210205
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,64,2,128,1,fp8,fp8,0,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,64,2,128,1,float16,float16,0,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,64,4,128,1,float16,fp8,0,0.03789333254098892
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,64,4,128,1,fp8,fp8,0,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,64,8,128,1,float16,float16,0,0.03719466676314672
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,64,64,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,64,8,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,64,8,128,1,float16,fp8,0,0.03823466598987579
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,64,64,128,1,float16,fp8,0,0.025253333151340485
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,64,1,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,64,1,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,64,1,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,64,2,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,64,4,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,64,2,128,1,float16,fp8,0,0.025253333151340485
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,64,4,128,1,fp8,fp8,0,0.025263999899228413
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,64,2,128,1,fp8,fp8,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,64,4,128,1,float16,fp8,0,0.024890666206677754
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,64,64,128,1,fp8,fp8,0,0.0272533322374026
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,64,8,128,1,float16,float16,0,0.025946666797002155
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,64,8,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,64,8,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,64,64,128,1,float16,fp8,0,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,64,64,128,1,float16,float16,0,0.01947733387351036
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,64,1,128,1,float16,float16,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,64,64,128,1,fp8,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,64,1,128,1,fp8,fp8,0,0.01876266673207283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,64,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,64,2,128,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,64,2,128,1,float16,float16,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,64,4,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,64,4,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,64,4,128,1,fp8,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,64,8,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,64,1,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,64,8,128,1,fp8,fp8,0,0.018800000349680584
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,64,8,128,1,float16,fp8,0,0.01878400022784869
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,64,64,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,64,64,128,1,float16,float16,0,0.01670933390657107
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,64,64,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,64,1,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,64,1,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,64,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,64,2,128,1,float16,fp8,0,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,64,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,64,4,128,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,64,4,128,1,float16,float16,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,64,4,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,64,8,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,64,8,128,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,64,8,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,64,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,64,1,128,1,float16,float16,0,0.5379466613133749
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,64,1,128,1,fp8,fp8,0,0.605183998743693
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,64,1,128,1,float16,fp8,0,0.5433919827143351
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,64,2,128,1,float16,float16,0,0.5406719843546549
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,64,2,128,1,float16,fp8,0,0.5444106658299764
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,64,2,128,1,fp8,fp8,0,0.6062026818593343
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,64,4,128,1,float16,float16,0,0.5410133202870687
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,64,4,128,1,float16,fp8,0,0.5488640069961548
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,64,4,128,1,fp8,fp8,0,0.6085973183314005
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,64,8,128,1,float16,float16,0,0.5468213160832723
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,64,1,128,1,float16,float16,0,0.2821173270543416
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,64,8,128,1,fp8,fp8,0,0.6126933495203654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,64,8,128,1,float16,fp8,0,0.5468213160832723
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,64,64,128,1,float16,fp8,0,0.3012320001920064
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,64,64,128,1,float16,float16,0,0.30668799082438153
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,64,64,128,1,fp8,fp8,0,0.3394560019175212
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,64,1,128,1,float16,fp8,0,0.27804799874623615
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,64,1,128,1,fp8,fp8,0,0.3114666740099589
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,64,2,128,1,float16,fp8,0,0.2783626715342204
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,64,2,128,1,fp8,fp8,0,0.3107840021451314
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,64,4,128,1,float16,float16,0,0.2797173261642456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,64,4,128,1,float16,fp8,0,0.28074665864308673
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,64,4,128,1,fp8,fp8,0,0.31385600566864014
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,64,2,128,1,float16,float16,0,0.2831626733144124
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,64,1,128,1,float16,float16,0,0.14761599898338318
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,64,8,128,1,float16,float16,0,0.28245866298675537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,64,8,128,1,float16,fp8,0,0.28313066562016803
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,64,8,128,1,fp8,fp8,0,0.3165653347969055
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,64,1,128,1,float16,fp8,0,0.14899733662605286
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,64,64,128,1,float16,fp8,0,0.15991999705632529
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,64,64,128,1,float16,float16,0,0.16025599837303162
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,64,64,128,1,fp8,fp8,0,0.18161600828170776
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,64,1,128,1,fp8,fp8,0,0.16196266810099283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,64,2,128,1,float16,float16,0,0.14967466394106546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,64,2,128,1,float16,fp8,0,0.14813866217931113
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,64,2,128,1,fp8,fp8,0,0.16264533003171286
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,64,4,128,1,float16,fp8,0,0.1469386617342631
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,64,4,128,1,fp8,fp8,0,0.16230400403340658
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,64,8,128,1,float16,float16,0,0.14797332882881165
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,64,8,128,1,float16,fp8,0,0.14967999855677286
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,64,4,128,1,float16,float16,0,0.15001599987347922
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,64,8,128,1,fp8,fp8,0,0.16435199975967407
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,64,1,128,1,float16,float16,0,0.08258666594823201
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,64,1,128,1,float16,fp8,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,64,64,128,1,float16,fp8,0,0.08772266904513042
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,64,64,128,1,float16,float16,0,0.08874666690826416
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,64,1,128,1,fp8,fp8,0,0.08874666690826416
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,64,64,128,1,fp8,fp8,0,0.09896533687909444
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,64,2,128,1,float16,fp8,0,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,64,2,128,1,fp8,fp8,0,0.0890826682249705
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,64,2,128,1,float16,float16,0,0.0846453309059143
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,64,4,128,1,float16,float16,0,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,64,8,128,1,float16,float16,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,64,4,128,1,float16,fp8,0,0.08294400076071422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,64,4,128,1,fp8,fp8,0,0.08942400415738423
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,64,8,128,1,fp8,fp8,0,0.08942932883898418
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,64,8,128,1,float16,fp8,0,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,64,64,128,1,float16,float16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,64,1,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,64,64,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,64,2,128,1,float16,float16,0,0.04948266843954722
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,64,2,128,1,float16,fp8,0,0.05017599960168203
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,64,1,128,1,float16,fp8,0,0.05014933149019877
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,64,1,128,1,fp8,fp8,0,0.0532533327738444
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,64,2,128,1,fp8,fp8,0,0.05359466870625814
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,64,64,128,1,float16,fp8,0,0.05120000243186951
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,64,4,128,1,float16,float16,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,64,8,128,1,float16,float16,0,0.05120000243186951
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,64,4,128,1,float16,fp8,0,0.049829334020614624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,64,4,128,1,fp8,fp8,0,0.05427733560403188
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,64,8,128,1,fp8,fp8,0,0.053583999474843345
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,64,8,128,1,float16,fp8,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,64,1,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,64,64,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,64,64,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,64,1,128,1,float16,fp8,0,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,64,1,128,1,fp8,fp8,0,0.03345600018898646
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,64,64,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,64,2,128,1,fp8,fp8,0,0.03379199902216593
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,64,4,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,64,2,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,64,4,128,1,float16,fp8,0,0.03139200061559677
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,64,4,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,64,8,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,64,2,128,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,64,8,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,64,8,128,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,64,1,128,1,float16,fp8,0,0.021498667697111767
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,64,64,128,1,fp8,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,64,64,128,1,float16,float16,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,64,64,128,1,float16,fp8,0,0.022842665513356526
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,64,1,128,1,float16,float16,0,0.021141332884629566
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,64,1,128,1,fp8,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,64,2,128,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,64,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,64,4,128,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,64,2,128,1,float16,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,64,4,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,64,8,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,64,4,128,1,float16,fp8,0,0.022858666876951855
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,64,8,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,64,8,128,1,float16,fp8,0,0.022511998812357586
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,64,64,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,64,64,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,64,1,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,64,1,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,64,1,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,64,2,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,64,64,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,64,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,64,4,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,64,2,128,1,fp8,fp8,0,0.018789333601792652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,64,4,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,64,4,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,64,8,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,64,8,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,64,8,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,64,64,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,64,64,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,64,1,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,64,1,128,1,fp8,fp8,0,0.016778666526079178
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,64,2,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,64,64,128,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,64,1,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,64,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,64,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,64,4,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,64,2,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,64,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,64,8,128,1,float16,float16,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,64,8,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,64,8,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,64,1,128,1,float16,float16,0,0.47325865427652997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,64,1,128,1,float16,fp8,0,0.4766720136006673
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,64,1,128,1,fp8,fp8,0,0.5317973295847574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,64,2,128,1,float16,float16,0,0.4742826620737712
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,64,2,128,1,float16,fp8,0,0.47974932193756104
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,64,2,128,1,fp8,fp8,0,0.532480001449585
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,64,4,128,1,float16,float16,0,0.47359999020894367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,64,4,128,1,float16,fp8,0,0.47362132867177326
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,64,1,128,1,float16,float16,0,0.24542399247487387
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,64,4,128,1,fp8,fp8,0,0.5338506698608398
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,64,8,128,1,float16,float16,0,0.4780319929122925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,64,8,128,1,float16,fp8,0,0.4780319929122925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,64,8,128,1,fp8,fp8,0,0.5369226535161337
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,64,64,128,1,float16,float16,0,0.262992004553477
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,64,64,128,1,float16,fp8,0,0.2590720057487488
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,64,64,128,1,fp8,fp8,0,0.2940640052159627
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,64,1,128,1,fp8,fp8,0,0.2728959918022156
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,64,1,128,1,float16,fp8,0,0.24473599592844644
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,64,2,128,1,float16,float16,0,0.24438399076461792
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,64,2,128,1,float16,fp8,0,0.24644800027211508
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,64,2,128,1,fp8,fp8,0,0.27187200387318927
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,64,4,128,1,float16,float16,0,0.24952000379562378
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,64,4,128,1,float16,fp8,0,0.24644267559051514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,64,4,128,1,fp8,fp8,0,0.27527467409769696
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,64,8,128,1,float16,float16,0,0.24644267559051514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,64,8,128,1,float16,fp8,0,0.24780799945195517
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,64,8,128,1,fp8,fp8,0,0.27529066801071167
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,64,64,128,1,float16,float16,0,0.13756266236305237
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,64,1,128,1,fp8,fp8,0,0.1437066694100698
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,64,1,128,1,float16,fp8,0,0.13175466656684875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,64,1,128,1,float16,float16,0,0.1307253340880076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,64,2,128,1,float16,float16,0,0.13226667046546936
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,64,64,128,1,float16,fp8,0,0.1365333298842112
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,64,64,128,1,fp8,fp8,0,0.15581867098808289
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,64,2,128,1,float16,fp8,0,0.13191999991734824
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,64,4,128,1,float16,float16,0,0.13124799728393555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,64,2,128,1,fp8,fp8,0,0.14387200276056925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,64,8,128,1,float16,float16,0,0.13192533453305563
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,64,4,128,1,float16,fp8,0,0.13175466656684875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,64,4,128,1,fp8,fp8,0,0.14387200276056925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,64,8,128,1,fp8,fp8,0,0.1460853318373362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,64,8,128,1,float16,fp8,0,0.13175466656684875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,64,64,128,1,fp8,fp8,0,0.08295999964078267
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,64,64,128,1,float16,float16,0,0.07614933451016744
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,64,1,128,1,float16,float16,0,0.07441066702206929
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,64,64,128,1,float16,fp8,0,0.07645333309968312
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,64,1,128,1,float16,fp8,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,64,2,128,1,float16,float16,0,0.07441066702206929
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,64,1,128,1,fp8,fp8,0,0.08020799855391185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,64,2,128,1,fp8,fp8,0,0.08089600006739299
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,64,2,128,1,float16,fp8,0,0.07441066702206929
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,64,4,128,1,float16,float16,0,0.07543466488520305
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,64,4,128,1,float16,fp8,0,0.07441600163777669
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,64,4,128,1,fp8,fp8,0,0.08224533498287201
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,64,8,128,1,float16,fp8,0,0.0743999977906545
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,64,64,128,1,float16,float16,0,0.046426668763160706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,64,8,128,1,float16,float16,0,0.07441066702206929
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,64,64,128,1,fp8,fp8,0,0.04916266600290934
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,64,64,128,1,float16,fp8,0,0.04538666705290476
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,64,1,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,64,2,128,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,64,2,128,1,fp8,fp8,0,0.047456001242001854
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,64,1,128,1,float16,float16,0,0.045050665736198425
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,64,8,128,1,fp8,fp8,0,0.08021866778532664
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,64,4,128,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,64,2,128,1,float16,fp8,0,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,64,4,128,1,fp8,fp8,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,64,1,128,1,float16,fp8,0,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,64,64,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,64,4,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,64,64,128,1,float16,fp8,0,0.0310506671667099
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,64,64,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,64,8,128,1,float16,fp8,0,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,64,1,128,1,float16,fp8,0,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,64,8,128,1,float16,float16,0,0.045423999428749084
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,64,2,128,1,float16,fp8,0,0.02934933453798294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,64,8,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,64,2,128,1,float16,float16,0,0.02898666759332021
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,64,2,128,1,fp8,fp8,0,0.031082667410373688
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,64,4,128,1,fp8,fp8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,64,1,128,1,fp8,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,64,4,128,1,float16,float16,0,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,64,1,128,1,float16,float16,0,0.029002666473388672
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,64,4,128,1,float16,fp8,0,0.030031998952229817
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,64,8,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,64,8,128,1,float16,float16,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,64,64,128,1,float16,float16,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,64,64,128,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,64,8,128,1,fp8,fp8,0,0.029365333418051403
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,64,64,128,1,float16,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,64,1,128,1,float16,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,64,2,128,1,float16,fp8,0,0.02083733429511388
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,64,1,128,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,64,2,128,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,64,2,128,1,float16,float16,0,0.020831999679406483
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,64,4,128,1,fp8,fp8,0,0.02080533280968666
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,64,4,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,64,8,128,1,float16,float16,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,64,4,128,1,float16,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,64,1,128,1,float16,float16,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,64,8,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,64,8,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,64,64,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,64,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,64,64,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,64,64,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,64,2,128,1,float16,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,64,1,128,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,64,2,128,1,float16,float16,0,0.018090666582187016
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,64,2,128,1,fp8,fp8,0,0.016741332908471424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,64,4,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,64,8,128,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,64,64,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,64,8,128,1,fp8,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,64,4,128,1,float16,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,64,64,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,64,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,64,1,128,1,fp8,fp8,0,0.01878400022784869
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,64,4,128,1,fp8,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,64,64,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,64,1,128,1,float16,float16,0,0.01674666628241539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,64,2,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,64,1,128,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,64,1,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,64,2,128,1,float16,fp8,0,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,64,4,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,64,4,128,1,fp8,fp8,0,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,64,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,64,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,64,8,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,64,8,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,64,2,128,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,32,1,128,1,fp8,fp8,0,11.996660868326822
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,32,1,128,1,float16,float16,0,15.645866394042969
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,32,2,128,1,fp8,fp8,0,11.972948710123697
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,32,2,128,1,float16,float16,0,15.55349858601888
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,32,2,128,1,float16,fp8,0,15.829845428466797
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,32,1,128,1,float16,fp8,0,15.54415512084961
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,32,4,128,1,float16,fp8,0,15.512405395507812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,32,4,128,1,float16,float16,0,15.81106694539388
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,32,1,128,1,float16,float16,0,7.884458541870117
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,32,4,128,1,fp8,fp8,0,12.01800537109375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,32,1,128,1,float16,fp8,0,7.576576232910156
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,32,32,128,1,float16,float16,0,8.165536244710287
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,32,32,128,1,float16,fp8,0,7.957674662272136
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,32,8,128,1,float16,float16,0,15.803029378255209
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,32,8,128,1,float16,fp8,0,15.556234995524088
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,32,8,128,1,fp8,fp8,0,12.152661641438803
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,32,32,128,1,fp8,fp8,0,6.336858749389648
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,32,1,128,1,fp8,fp8,0,6.0936533610026045
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,32,2,128,1,float16,float16,0,7.763973236083984
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,32,2,128,1,float16,fp8,0,7.595354715983073
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,32,2,128,1,fp8,fp8,0,6.10865592956543
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,32,4,128,1,float16,fp8,0,7.4071095784505205
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,32,4,128,1,float16,float16,0,7.667722702026367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,32,4,128,1,fp8,fp8,0,6.131877263387044
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,32,8,128,1,float16,float16,0,7.660213470458984
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,32,8,128,1,float16,fp8,0,7.76533317565918
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,32,1,128,1,float16,float16,0,3.772245407104492
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,32,1,128,1,float16,fp8,0,3.7712319691975913
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,32,1,128,1,fp8,fp8,0,3.2107518513997397
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,32,8,128,1,fp8,fp8,0,6.1641387939453125
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,32,32,128,1,float16,float16,0,3.832325299580892
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,32,2,128,1,float16,float16,0,3.7756694157918296
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,32,32,128,1,float16,fp8,0,3.914933204650879
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,32,2,128,1,float16,fp8,0,3.755178769429525
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,32,2,128,1,fp8,fp8,0,3.213141441345215
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,32,32,128,1,fp8,fp8,0,3.3116159439086914
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,32,4,128,1,float16,float16,0,3.688960075378418
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,32,4,128,1,float16,fp8,0,3.769189198811849
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,32,4,128,1,fp8,fp8,0,3.2189385096232095
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,32,1,128,1,float16,float16,0,2.0084053675333657
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,32,8,128,1,float16,float16,0,3.7350241343180337
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,32,8,128,1,float16,fp8,0,3.8207200368245444
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,32,32,128,1,float16,float16,0,2.035029411315918
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,32,8,128,1,fp8,fp8,0,3.23635196685791
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,32,1,128,1,float16,fp8,0,2.001237392425537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,32,32,128,1,float16,fp8,0,2.055845260620117
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,32,32,128,1,fp8,fp8,0,1.8126506805419922
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,32,1,128,1,fp8,fp8,0,1.7604319254557292
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,32,2,128,1,float16,float16,0,1.9974826176961262
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,32,2,128,1,float16,fp8,0,1.9978133837382
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,32,2,128,1,fp8,fp8,0,1.7638452847798665
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,32,4,128,1,float16,float16,0,2.022058645884196
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,32,4,128,1,fp8,fp8,0,1.766218662261963
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,32,4,128,1,float16,fp8,0,2.0084053675333657
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,32,8,128,1,float16,float16,0,1.999194622039795
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,32,8,128,1,float16,fp8,0,2.026495933532715
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,32,8,128,1,fp8,fp8,0,1.7699839274088542
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,32,1,128,1,float16,float16,0,9.008464177449545
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,32,1,128,1,float16,fp8,0,8.556383768717447
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,32,1,128,1,fp8,fp8,0,7.187114715576172
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,32,2,128,1,float16,fp8,0,8.955904006958008
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,32,2,128,1,float16,float16,0,8.583733240763346
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,32,2,128,1,fp8,fp8,0,7.188138961791992
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,32,4,128,1,float16,fp8,0,8.509791692097982
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,32,4,128,1,float16,float16,0,9.26907730102539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,32,1,128,1,float16,float16,0,4.528298695882161
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,32,4,128,1,fp8,fp8,0,7.198378880818685
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,32,8,128,1,float16,float16,0,8.969210942586264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,32,8,128,1,float16,fp8,0,9.298773447672525
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,32,8,128,1,fp8,fp8,0,7.248378753662109
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,32,32,128,1,float16,float16,0,4.648805300394694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,32,1,128,1,float16,fp8,0,4.377770741780599
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,32,32,128,1,float16,fp8,0,4.592464129130046
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,32,1,128,1,fp8,fp8,0,3.685551961263021
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,32,32,128,1,fp8,fp8,0,3.8592907587687173
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,32,2,128,1,float16,float16,0,4.276565233866374
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,32,2,128,1,float16,fp8,0,4.546383857727051
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,32,2,128,1,fp8,fp8,0,3.6968053181966147
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,32,4,128,1,float16,float16,0,4.2673492431640625
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,32,4,128,1,float16,fp8,0,4.445365269978841
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,32,4,128,1,fp8,fp8,0,3.7097867329915366
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,32,8,128,1,float16,float16,0,4.28441588083903
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,32,8,128,1,float16,fp8,0,4.297727902730306
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,32,1,128,1,float16,float16,0,2.2485334078470864
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,32,1,128,1,float16,fp8,0,2.2234400113423667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,32,1,128,1,fp8,fp8,0,1.964037259419759
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,32,8,128,1,fp8,fp8,0,3.7309385935465493
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,32,32,128,1,float16,fp8,0,2.331648031870524
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,32,32,128,1,float16,float16,0,2.337776025136312
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,32,2,128,1,float16,float16,0,2.236245314280192
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,32,2,128,1,float16,fp8,0,2.255018711090088
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,32,2,128,1,fp8,fp8,0,1.9684747060139973
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,32,32,128,1,fp8,fp8,0,2.0415093104044595
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,32,4,128,1,float16,float16,0,2.2342453002929688
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,32,4,128,1,fp8,fp8,0,1.9725546836853027
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,32,4,128,1,float16,fp8,0,2.2366080284118652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,32,8,128,1,float16,fp8,0,2.257754643758138
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,32,8,128,1,float16,float16,0,2.2734506924947104
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,32,8,128,1,fp8,fp8,0,1.9790560404459636
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,32,32,128,1,float16,float16,0,1.2569653193155925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,32,32,128,1,float16,fp8,0,1.2723360061645508
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,32,1,128,1,float16,float16,0,1.228117307027181
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,32,1,128,1,float16,fp8,0,1.2250453631083171
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,32,1,128,1,fp8,fp8,0,1.0992639859517415
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,32,2,128,1,float16,float16,0,1.2407466570536296
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,32,2,128,1,float16,fp8,0,1.2380106449127197
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,32,2,128,1,fp8,fp8,0,1.1043519973754883
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,32,4,128,1,float16,float16,0,1.2298293113708496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,32,32,128,1,fp8,fp8,0,1.1395466327667236
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,32,4,128,1,float16,fp8,0,1.2356266975402832
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,32,4,128,1,fp8,fp8,0,1.1047253608703613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,32,8,128,1,float16,float16,0,1.2339200178782146
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,32,8,128,1,fp8,fp8,0,1.1098399957021077
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,32,8,128,1,float16,fp8,0,1.2526933352152507
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,32,1,128,1,float16,float16,0,6.510079701741536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,32,1,128,1,float16,fp8,0,6.437711715698242
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,32,1,128,1,fp8,fp8,0,5.216085433959961
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,32,2,128,1,float16,float16,0,6.460757573445638
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,32,2,128,1,float16,fp8,0,6.0496266682942705
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,32,2,128,1,fp8,fp8,0,5.224282582600911
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,32,4,128,1,float16,float16,0,6.570325215657552
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,32,4,128,1,float16,fp8,0,6.574565251668294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,32,1,128,1,float16,float16,0,3.1264425913492837
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,32,4,128,1,fp8,fp8,0,5.252607981363933
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,32,8,128,1,float16,float16,0,6.434474945068359
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,32,8,128,1,float16,fp8,0,6.309386571248372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,32,8,128,1,fp8,fp8,0,5.286399841308594
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,32,1,128,1,float16,fp8,0,3.1230293909708657
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,32,32,128,1,float16,float16,0,3.206666628519694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,32,32,128,1,float16,fp8,0,3.262634595235189
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,32,1,128,1,fp8,fp8,0,2.7060906092325845
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,32,32,128,1,fp8,fp8,0,2.8470398585001626
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,32,2,128,1,float16,float16,0,3.1462507247924805
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,32,2,128,1,float16,fp8,0,3.0947093963623047
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,32,2,128,1,fp8,fp8,0,2.711210568745931
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,32,4,128,1,fp8,fp8,0,2.7149651845296225
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,32,4,128,1,float16,fp8,0,3.0923147201538086
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,32,4,128,1,float16,float16,0,3.1730454762776694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,32,8,128,1,float16,float16,0,3.099818547566732
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,32,8,128,1,float16,fp8,0,3.149989446004232
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,32,1,128,1,float16,float16,0,1.6279892921447754
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,32,1,128,1,float16,fp8,0,1.6539360682169597
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,32,1,128,1,fp8,fp8,0,1.453887939453125
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,32,32,128,1,float16,float16,0,1.692325274149577
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,32,8,128,1,fp8,fp8,0,2.733226776123047
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,32,32,128,1,float16,fp8,0,1.7129759788513184
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,32,2,128,1,float16,float16,0,1.6481332778930664
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,32,2,128,1,float16,fp8,0,1.6354986826578777
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,32,2,128,1,fp8,fp8,0,1.4576692581176758
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,32,32,128,1,fp8,fp8,0,1.518602689107259
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,32,4,128,1,float16,float16,0,1.633109410603841
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,32,4,128,1,float16,fp8,0,1.6409600575764973
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,32,4,128,1,fp8,fp8,0,1.4576853116353352
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,32,8,128,1,float16,float16,0,1.6505173047383626
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,32,8,128,1,float16,fp8,0,1.6638186772664387
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,32,8,128,1,fp8,fp8,0,1.469098726908366
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,32,32,128,1,float16,fp8,0,0.9494187037150065
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,32,32,128,1,float16,float16,0,0.9470293521881104
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,32,1,128,1,float16,float16,0,0.9086293379465739
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,32,1,128,1,float16,fp8,0,0.9127146402994791
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,32,1,128,1,fp8,fp8,0,0.8272266387939453
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,32,32,128,1,fp8,fp8,0,0.8581173419952393
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,32,2,128,1,fp8,fp8,0,0.8272319634755453
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,32,2,128,1,float16,float16,0,0.9173546632130941
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,32,2,128,1,float16,fp8,0,0.912384033203125
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,32,4,128,1,float16,float16,0,0.9210879802703857
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,32,4,128,1,float16,fp8,0,0.9159626960754395
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,32,4,128,1,fp8,fp8,0,0.8292693297068278
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,32,8,128,1,float16,float16,0,0.9173333644866943
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,32,8,128,1,float16,fp8,0,0.9200692971547445
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,32,8,128,1,fp8,fp8,0,0.8337226708730062
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,32,1,128,1,float16,float16,0,8.650927861531576
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,32,1,128,1,float16,fp8,0,8.613898595174154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,32,1,128,1,fp8,fp8,0,7.044095993041992
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,32,2,128,1,float16,float16,0,8.791247685750326
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,32,2,128,1,float16,fp8,0,8.553647994995117
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,32,2,128,1,fp8,fp8,0,7.060159683227539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,32,4,128,1,float16,float16,0,9.064794540405273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,32,4,128,1,float16,fp8,0,8.805546442667643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,32,1,128,1,float16,float16,0,4.179264068603516
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,32,4,128,1,fp8,fp8,0,7.083178838094075
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,32,1,128,1,float16,fp8,0,4.197733243306478
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,32,8,128,1,float16,float16,0,8.783360163370768
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,32,8,128,1,float16,fp8,0,8.478890736897787
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,32,8,128,1,fp8,fp8,0,7.125333150227864
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,32,32,128,1,float16,float16,0,4.455264091491699
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,32,32,128,1,float16,fp8,0,4.503055890401204
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,32,32,128,1,fp8,fp8,0,3.812522570292155
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,32,1,128,1,fp8,fp8,0,3.5785385767618814
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,32,2,128,1,float16,float16,0,4.0881547927856445
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,32,2,128,1,float16,fp8,0,4.219567934672038
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,32,2,128,1,fp8,fp8,0,3.596970558166504
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,32,4,128,1,float16,float16,0,4.20250129699707
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,32,4,128,1,float16,fp8,0,4.128773371378581
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,32,4,128,1,fp8,fp8,0,3.6024319330851235
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,32,8,128,1,float16,float16,0,4.362069447835286
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,32,1,128,1,float16,float16,0,2.1036532719930015
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,32,1,128,1,float16,fp8,0,2.1091039975484214
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,32,8,128,1,float16,fp8,0,4.320085207621257
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,32,8,128,1,fp8,fp8,0,3.6259892781575522
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,32,1,128,1,fp8,fp8,0,1.8715306917826335
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,32,32,128,1,float16,float16,0,2.2002293268839517
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,32,2,128,1,float16,float16,0,2.108448028564453
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,32,2,128,1,float16,fp8,0,2.1152426401774087
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,32,2,128,1,fp8,fp8,0,1.874608039855957
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,32,32,128,1,float16,fp8,0,2.259455998738607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,32,32,128,1,fp8,fp8,0,1.9834879239400227
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,32,4,128,1,float16,float16,0,2.1165760358174643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,32,4,128,1,float16,fp8,0,2.107738653818766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,32,4,128,1,fp8,fp8,0,1.8790133794148762
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,32,8,128,1,float16,float16,0,2.126512050628662
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,32,8,128,1,float16,fp8,0,2.137770652770996
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,32,32,128,1,float16,float16,0,1.1881813208262126
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,32,8,128,1,fp8,fp8,0,1.892858664194743
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,32,32,128,1,float16,fp8,0,1.1871626377105713
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,32,1,128,1,float16,float16,0,1.1238400141398113
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,32,1,128,1,float16,fp8,0,1.125205357869466
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,32,1,128,1,fp8,fp8,0,1.0156479676564534
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,32,2,128,1,float16,float16,0,1.1303253173828125
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,32,2,128,1,float16,fp8,0,1.1433013280232747
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,32,2,128,1,fp8,fp8,0,1.019056002298991
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,32,4,128,1,float16,float16,0,1.1306613286336262
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,32,32,128,1,fp8,fp8,0,1.0678613185882568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,32,4,128,1,float16,fp8,0,1.1306613286336262
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,32,4,128,1,fp8,fp8,0,1.0217866897583008
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,32,1,128,1,float16,float16,0,0.6367626587549845
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,32,1,128,1,float16,fp8,0,0.6401706536610922
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,32,8,128,1,float16,float16,0,1.1364693641662598
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,32,8,128,1,float16,fp8,0,1.1439786752065022
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,32,8,128,1,fp8,fp8,0,1.0292906761169434
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,32,32,128,1,float16,float16,0,0.667306661605835
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,32,32,128,1,float16,fp8,0,0.6703786849975586
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,32,32,128,1,fp8,fp8,0,0.6150826613108317
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,32,1,128,1,fp8,fp8,0,0.5905119975407919
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,32,2,128,1,float16,float16,0,0.6388159990310669
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,32,2,128,1,float16,fp8,0,0.6415359973907471
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,32,2,128,1,fp8,fp8,0,0.5911680062611898
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,32,4,128,1,float16,float16,0,0.6493920087814331
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,32,4,128,1,float16,fp8,0,0.649727980295817
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,32,4,128,1,fp8,fp8,0,0.5918613274892172
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,32,8,128,1,float16,float16,0,0.6466720104217529
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,32,8,128,1,float16,fp8,0,0.6493866840998331
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,32,8,128,1,fp8,fp8,0,0.5963093439737955
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,32,1,128,1,float16,float16,0,5.051402727762858
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,32,1,128,1,float16,fp8,0,5.131445248921712
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,32,1,128,1,fp8,fp8,0,4.401007970174153
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,32,2,128,1,fp8,fp8,0,4.399279912312825
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,32,2,128,1,float16,fp8,0,5.06930669148763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,32,2,128,1,float16,float16,0,5.028698603312175
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,32,4,128,1,float16,fp8,0,5.027514775594075
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,32,4,128,1,float16,float16,0,5.047989209493001
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,32,1,128,1,float16,float16,0,2.497536023457845
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,32,4,128,1,fp8,fp8,0,4.41105588277181
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,32,8,128,1,float16,float16,0,5.1954240798950195
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,32,1,128,1,float16,fp8,0,2.4958292643229165
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,32,8,128,1,fp8,fp8,0,4.46445878346761
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,32,8,128,1,float16,fp8,0,5.083493232727051
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,32,32,128,1,float16,fp8,0,2.7173439661661782
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,32,32,128,1,float16,float16,0,2.6671787897745767
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,32,1,128,1,fp8,fp8,0,2.244095961252848
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,32,32,128,1,fp8,fp8,0,2.4205652872721353
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,32,2,128,1,float16,float16,0,2.516815980275472
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,32,2,128,1,float16,fp8,0,2.504021326700846
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,32,2,128,1,fp8,fp8,0,2.2502454121907554
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,32,4,128,1,float16,float16,0,2.5598133405049643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,32,4,128,1,float16,fp8,0,2.550607999165853
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,32,4,128,1,fp8,fp8,0,2.2615040143330893
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,32,8,128,1,float16,float16,0,2.5465332667032876
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,32,8,128,1,float16,fp8,0,2.5424267450968423
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,32,1,128,1,float16,float16,0,1.3192533651987712
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,32,1,128,1,float16,fp8,0,1.308672030766805
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,32,8,128,1,fp8,fp8,0,2.2864160537719727
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,32,32,128,1,float16,float16,0,1.3806880315144856
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,32,1,128,1,fp8,fp8,0,1.1851253509521484
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,32,32,128,1,float16,fp8,0,1.4045813878377278
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,32,2,128,1,float16,float16,0,1.3076480229695637
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,32,2,128,1,float16,fp8,0,1.3243733247121174
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,32,2,128,1,fp8,fp8,0,1.1885226567586262
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,32,32,128,1,fp8,fp8,0,1.2750506401062012
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,32,4,128,1,float16,fp8,0,1.3134506543477376
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,32,4,128,1,float16,float16,0,1.3192533651987712
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,32,4,128,1,fp8,fp8,0,1.1912533442179363
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,32,8,128,1,float16,float16,0,1.3277866840362549
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,32,8,128,1,float16,fp8,0,1.328810691833496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,32,8,128,1,fp8,fp8,0,1.2042293548583984
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,32,32,128,1,float16,float16,0,0.7482026418050131
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,32,1,128,1,float16,float16,0,0.7127412954966227
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,32,32,128,1,float16,fp8,0,0.7669760386149088
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,32,1,128,1,float16,fp8,0,0.7137227058410645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,32,1,128,1,fp8,fp8,0,0.6553653478622437
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,32,2,128,1,float16,float16,0,0.7127093474070231
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,32,2,128,1,fp8,fp8,0,0.6563839912414551
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,32,2,128,1,float16,fp8,0,0.719871997833252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,32,32,128,1,fp8,fp8,0,0.6970132986704508
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,32,4,128,1,float16,float16,0,0.718506654103597
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,32,4,128,1,float16,fp8,0,0.7256746292114258
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,32,4,128,1,fp8,fp8,0,0.657039999961853
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,32,8,128,1,float16,float16,0,0.7219200134277344
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,32,1,128,1,float16,fp8,0,0.41659732659657794
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,32,1,128,1,float16,float16,0,0.41626131534576416
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,32,8,128,1,float16,fp8,0,0.7239680290222168
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,32,8,128,1,fp8,fp8,0,0.6638933420181274
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,32,32,128,1,float16,float16,0,0.4360533157984416
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,32,1,128,1,fp8,fp8,0,0.3906773328781128
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,32,32,128,1,float16,fp8,0,0.442197322845459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,32,32,128,1,fp8,fp8,0,0.4123306671778361
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,32,2,128,1,float16,float16,0,0.4159146547317505
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,32,2,128,1,float16,fp8,0,0.4189866781234741
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,32,2,128,1,fp8,fp8,0,0.389631986618042
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,32,4,128,1,fp8,fp8,0,0.3921866814295451
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,32,4,128,1,float16,fp8,0,0.41998934745788574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,32,4,128,1,float16,float16,0,0.4196693499883016
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,32,8,128,1,float16,float16,0,0.42205333709716797
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,32,8,128,1,float16,fp8,0,0.42583465576171875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,32,8,128,1,fp8,fp8,0,0.3949173291524251
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,32,1,128,1,float16,float16,0,5.181952158610026
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,32,1,128,1,fp8,fp8,0,4.693162600199382
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,32,1,128,1,float16,fp8,0,5.29526933034261
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,32,2,128,1,float16,fp8,0,5.153621355692546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,32,2,128,1,float16,float16,0,5.362351735432942
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,32,2,128,1,fp8,fp8,0,4.7143252690633135
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,32,4,128,1,float16,float16,0,5.34614372253418
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,32,4,128,1,float16,fp8,0,5.404325485229492
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,32,1,128,1,float16,float16,0,2.5625599225362143
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,32,4,128,1,fp8,fp8,0,4.760575930277507
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,32,1,128,1,float16,fp8,0,2.5618346532185874
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,32,8,128,1,float16,float16,0,5.281125386555989
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,32,8,128,1,float16,fp8,0,5.385904312133789
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,32,32,128,1,float16,float16,0,2.8002986907958984
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,32,8,128,1,fp8,fp8,0,4.7810773849487305
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,32,32,128,1,float16,fp8,0,2.805760065714518
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,32,32,128,1,fp8,fp8,0,2.5980587005615234
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,32,1,128,1,fp8,fp8,0,2.3775359789530435
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,32,2,128,1,float16,float16,0,2.5683627128601074
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,32,2,128,1,float16,fp8,0,2.5799733797709146
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,32,2,128,1,fp8,fp8,0,2.377903938293457
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,32,4,128,1,float16,fp8,0,2.5779147148132324
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,32,4,128,1,float16,float16,0,2.609322706858317
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,32,4,128,1,fp8,fp8,0,2.3860905965169272
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,32,8,128,1,float16,float16,0,2.610688050587972
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,32,1,128,1,float16,float16,0,1.3284693559010823
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,32,8,128,1,float16,fp8,0,2.6511359214782715
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,32,1,128,1,float16,fp8,0,1.314469337463379
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,32,1,128,1,fp8,fp8,0,1.2301653226216633
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,32,8,128,1,fp8,fp8,0,2.40993595123291
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,32,32,128,1,float16,float16,0,1.4267733891805012
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,32,32,128,1,float16,fp8,0,1.438037395477295
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,32,2,128,1,float16,float16,0,1.333957354227702
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,32,2,128,1,float16,fp8,0,1.3424639701843262
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,32,2,128,1,fp8,fp8,0,1.2291466395060222
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,32,32,128,1,fp8,fp8,0,1.34006929397583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,32,4,128,1,float16,float16,0,1.3305173714955647
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,32,4,128,1,fp8,fp8,0,1.2413973013559978
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,32,4,128,1,float16,fp8,0,1.3315412998199463
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,32,8,128,1,float16,float16,0,1.3370025952657063
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,32,1,128,1,float16,float16,0,0.7014400164286295
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,32,8,128,1,float16,fp8,0,1.3574879964192708
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,32,1,128,1,float16,fp8,0,0.7017813523610433
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,32,8,128,1,fp8,fp8,0,1.2596960067749023
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,32,32,128,1,float16,fp8,0,0.7615146636962891
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,32,32,128,1,float16,float16,0,0.7471840381622314
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,32,1,128,1,fp8,fp8,0,0.6567413409550985
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,32,32,128,1,fp8,fp8,0,0.7147573630015055
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,32,2,128,1,float16,float16,0,0.7037973403930664
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,32,2,128,1,float16,fp8,0,0.708954652150472
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,32,2,128,1,fp8,fp8,0,0.6587680180867513
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,32,4,128,1,float16,float16,0,0.7161173025767008
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,32,4,128,1,float16,fp8,0,0.7089440027872721
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,32,4,128,1,fp8,fp8,0,0.6625279982884725
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,32,8,128,1,float16,float16,0,0.7092853387196859
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,32,8,128,1,fp8,fp8,0,0.6713973681131998
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,32,8,128,1,float16,fp8,0,0.7167999744415283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,32,32,128,1,float16,float16,0,0.4193280140558879
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,32,32,128,1,float16,fp8,0,0.42444801330566406
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,32,1,128,1,float16,float16,0,0.39153067270914715
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,32,1,128,1,float16,fp8,0,0.3928746779759725
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,32,32,128,1,fp8,fp8,0,0.4034506479899089
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,32,1,128,1,fp8,fp8,0,0.3705173333485921
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,32,2,128,1,float16,float16,0,0.39288000265757245
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,32,2,128,1,float16,fp8,0,0.39458131790161133
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,32,2,128,1,fp8,fp8,0,0.3742826779683431
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,32,4,128,1,fp8,fp8,0,0.37563733259836835
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,32,4,128,1,float16,float16,0,0.39628799756368
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,32,4,128,1,float16,fp8,0,0.3993599812189738
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,32,8,128,1,float16,float16,0,0.4037919839223226
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,32,8,128,1,float16,fp8,0,0.40277334054311115
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,32,1,128,1,float16,float16,0,0.24064000447591147
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,32,1,128,1,float16,fp8,0,0.24166399240493774
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,32,8,128,1,fp8,fp8,0,0.37768534819285077
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,32,32,128,1,float16,float16,0,0.2553439935048421
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,32,1,128,1,fp8,fp8,0,0.22732800245285034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,32,32,128,1,fp8,fp8,0,0.24507200717926025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,32,32,128,1,float16,fp8,0,0.2596106727917989
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,32,2,128,1,float16,float16,0,0.24132267634073892
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,32,2,128,1,float16,fp8,0,0.24235200881958008
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,32,2,128,1,fp8,fp8,0,0.22735466559727988
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,32,4,128,1,float16,float16,0,0.23994133869806925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,32,4,128,1,float16,fp8,0,0.24337067206700644
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,32,4,128,1,fp8,fp8,0,0.22869332631429037
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,32,8,128,1,float16,float16,0,0.2426933248837789
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,32,8,128,1,float16,fp8,0,0.24337067206700644
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,32,8,128,1,fp8,fp8,0,0.23244800170262656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,32,1,128,1,fp8,fp8,0,3.135658582051595
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,32,1,128,1,float16,float16,0,3.3218612670898438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,32,1,128,1,float16,fp8,0,3.320842742919922
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,32,2,128,1,float16,fp8,0,3.4102614720662436
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,32,2,128,1,fp8,fp8,0,3.1445334752400718
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,32,2,128,1,float16,float16,0,3.3222080866495767
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,32,4,128,1,float16,float16,0,3.378517468770345
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,32,4,128,1,float16,fp8,0,3.318448066711426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,32,1,128,1,float16,float16,0,1.6709920565287273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,32,4,128,1,fp8,fp8,0,3.1687679290771484
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,32,1,128,1,float16,fp8,0,1.6709920565287273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,32,8,128,1,float16,float16,0,3.369466781616211
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,32,32,128,1,float16,float16,0,1.8105866114298503
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,32,8,128,1,float16,fp8,0,3.381765365600586
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,32,8,128,1,fp8,fp8,0,3.242495854695638
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,32,32,128,1,float16,fp8,0,1.8408320744832356
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,32,32,128,1,fp8,fp8,0,1.759424050649007
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,32,1,128,1,fp8,fp8,0,1.5921440124511719
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,32,2,128,1,fp8,fp8,0,1.594879945119222
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,32,2,128,1,float16,float16,0,1.6762933731079102
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,32,2,128,1,float16,fp8,0,1.6742453575134277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,32,4,128,1,float16,float16,0,1.6814079284667969
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,32,4,128,1,float16,fp8,0,1.7041066487630208
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,32,4,128,1,fp8,fp8,0,1.609557310740153
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,32,8,128,1,float16,float16,0,1.7122987111409504
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,32,1,128,1,float16,float16,0,0.8806347052256266
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,32,8,128,1,float16,fp8,0,1.711957295735677
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,32,1,128,1,float16,fp8,0,0.8768906593322754
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,32,1,128,1,fp8,fp8,0,0.8337066968282064
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,32,8,128,1,fp8,fp8,0,1.6269653638203938
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,32,32,128,1,float16,fp8,0,0.9538559913635254
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,32,2,128,1,float16,float16,0,0.8837119738260905
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,32,32,128,1,float16,float16,0,0.9405439694722494
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,32,32,128,1,fp8,fp8,0,0.9127253691355387
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,32,2,128,1,float16,fp8,0,0.8762079874674479
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,32,2,128,1,fp8,fp8,0,0.8367679913838705
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,32,4,128,1,float16,float16,0,0.8806400299072266
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,32,4,128,1,float16,fp8,0,0.8792800108591715
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,32,4,128,1,fp8,fp8,0,0.834282636642456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,32,8,128,1,float16,float16,0,0.8991040388743082
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,32,8,128,1,float16,fp8,0,0.8925866285959879
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,32,8,128,1,fp8,fp8,0,0.8482133547465006
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,32,32,128,1,float16,float16,0,0.5053600072860718
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,32,1,128,1,float16,float16,0,0.46882132689158124
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,32,32,128,1,float16,fp8,0,0.5132000048955282
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,32,1,128,1,float16,fp8,0,0.47154664993286133
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,32,1,128,1,fp8,fp8,0,0.4524266719818115
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,32,2,128,1,float16,float16,0,0.4708746671676636
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,32,2,128,1,fp8,fp8,0,0.4517600138982137
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,32,2,128,1,float16,fp8,0,0.47257598241170246
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,32,32,128,1,fp8,fp8,0,0.4910080035527547
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,32,4,128,1,float16,float16,0,0.4722293217976888
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,32,4,128,1,float16,fp8,0,0.47599999109903973
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,32,4,128,1,fp8,fp8,0,0.4534613291422526
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,32,1,128,1,float16,float16,0,0.2653813362121582
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,32,8,128,1,float16,float16,0,0.4838186502456665
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,32,1,128,1,float16,fp8,0,0.2694773276646932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,32,8,128,1,float16,fp8,0,0.48213334878285724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,32,8,128,1,fp8,fp8,0,0.4599519968032837
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,32,32,128,1,float16,float16,0,0.2892746726671855
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,32,32,128,1,float16,fp8,0,0.29474133253097534
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,32,32,128,1,fp8,fp8,0,0.27938133478164673
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,32,1,128,1,fp8,fp8,0,0.2573546568552653
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,32,2,128,1,float16,float16,0,0.2691466609636943
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,32,2,128,1,float16,fp8,0,0.27084799607594806
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,32,4,128,1,float16,float16,0,0.27187200387318927
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,32,2,128,1,fp8,fp8,0,0.258730669816335
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,32,4,128,1,float16,fp8,0,0.27153066794077557
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,32,4,128,1,fp8,fp8,0,0.2619733413060506
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,32,8,128,1,float16,float16,0,0.2735733389854431
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,32,1,128,1,float16,float16,0,0.1701493263244629
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,32,1,128,1,float16,fp8,0,0.16861865917841592
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,32,1,128,1,fp8,fp8,0,0.1616266667842865
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,32,8,128,1,float16,fp8,0,0.2752853234608968
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,32,32,128,1,float16,fp8,0,0.18107734123865762
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,32,8,128,1,fp8,fp8,0,0.2653866608937581
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,32,32,128,1,float16,float16,0,0.1795413295427958
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,32,32,128,1,fp8,fp8,0,0.17338667313257852
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,32,2,128,1,float16,float16,0,0.16912533839543661
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,32,2,128,1,fp8,fp8,0,0.1611146628856659
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,32,2,128,1,float16,fp8,0,0.16963199774424234
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,32,4,128,1,float16,float16,0,0.1681333382924398
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,32,4,128,1,float16,fp8,0,0.16930667559305826
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,32,4,128,1,fp8,fp8,0,0.1621333360671997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,32,8,128,1,float16,float16,0,0.17015467087427774
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,32,8,128,1,float16,fp8,0,0.1701493263244629
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,32,8,128,1,fp8,fp8,0,0.16110400358835855
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,32,1,128,1,float16,float16,0,3.807056109110514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,32,1,128,1,float16,fp8,0,3.8162879943847656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,32,1,128,1,fp8,fp8,0,3.738117218017578
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,32,2,128,1,float16,float16,0,3.8319787979125977
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,32,2,128,1,float16,fp8,0,3.8664426803588867
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,32,2,128,1,fp8,fp8,0,3.7456159591674805
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,32,4,128,1,float16,fp8,0,3.8398399353027344
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,32,4,128,1,float16,float16,0,3.8381172815958657
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,32,1,128,1,float16,float16,0,1.9017279942830403
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,32,4,128,1,fp8,fp8,0,3.7712265650431314
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,32,1,128,1,float16,fp8,0,1.9034560521443684
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,32,8,128,1,float16,float16,0,3.8944479624430337
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,32,8,128,1,fp8,fp8,0,3.8367573420206704
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,32,8,128,1,float16,fp8,0,3.902970631917318
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,32,32,128,1,float16,float16,0,2.103637377421061
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,32,32,128,1,float16,fp8,0,2.09442138671875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,32,32,128,1,fp8,fp8,0,2.0814453760782876
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,32,1,128,1,fp8,fp8,0,1.8691466649373372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,32,2,128,1,float16,float16,0,1.9109652837117512
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,32,2,128,1,float16,fp8,0,1.9191466967264812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,32,2,128,1,fp8,fp8,0,1.8879146575927734
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,32,4,128,1,float16,float16,0,1.928010622660319
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,32,4,128,1,float16,fp8,0,1.9457707405090332
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,32,4,128,1,fp8,fp8,0,1.885525385538737
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,32,8,128,1,float16,float16,0,1.9430452982584636
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,32,8,128,1,float16,fp8,0,1.9505653381347656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,32,1,128,1,float16,float16,0,0.986624002456665
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,32,1,128,1,float16,fp8,0,0.9760426680246989
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,32,8,128,1,fp8,fp8,0,1.9198187192281086
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,32,1,128,1,fp8,fp8,0,0.9534826278686523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,32,2,128,1,float16,float16,0,0.9811573028564453
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,32,32,128,1,float16,fp8,0,1.0808320045471191
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,32,32,128,1,float16,float16,0,1.0688640276590984
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,32,2,128,1,float16,fp8,0,0.9924373626708984
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,32,2,128,1,fp8,fp8,0,0.9647786617279053
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,32,32,128,1,fp8,fp8,0,1.0668373107910156
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,32,4,128,1,float16,float16,0,0.9804800351460775
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,32,4,128,1,float16,fp8,0,0.9845813115437826
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,32,4,128,1,fp8,fp8,0,0.9647733370463053
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,32,8,128,1,float16,float16,0,0.9975679715474447
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,32,8,128,1,float16,fp8,0,1.0033546288808186
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,32,32,128,1,float16,float16,0,0.5563733180363973
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,32,8,128,1,fp8,fp8,0,0.9934346675872803
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,32,1,128,1,float16,float16,0,0.5111519893010458
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,32,1,128,1,float16,fp8,0,0.5118399858474731
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,32,32,128,1,float16,fp8,0,0.5676373243331909
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,32,1,128,1,fp8,fp8,0,0.5063626766204834
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,32,2,128,1,float16,float16,0,0.5142186482747396
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,32,2,128,1,float16,fp8,0,0.5135360161463419
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,32,2,128,1,fp8,fp8,0,0.5118293364842733
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,32,4,128,1,float16,float16,0,0.5141973495483398
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,32,4,128,1,float16,fp8,0,0.517632007598877
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,32,32,128,1,fp8,fp8,0,0.5601226488749186
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,32,4,128,1,fp8,fp8,0,0.5125120083491007
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,32,8,128,1,float16,float16,0,0.5232640107472738
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,32,1,128,1,float16,float16,0,0.2763040065765381
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,32,1,128,1,float16,fp8,0,0.27767467498779297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,32,8,128,1,float16,fp8,0,0.5284106731414795
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,32,8,128,1,fp8,fp8,0,0.5142186482747396
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,32,32,128,1,float16,float16,0,0.3056640028953552
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,32,32,128,1,float16,fp8,0,0.30907734235127765
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,32,32,128,1,fp8,fp8,0,0.3025919993718465
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,32,1,128,1,fp8,fp8,0,0.27426133553187054
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,32,2,128,1,float16,float16,0,0.2776693304379781
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,32,2,128,1,float16,fp8,0,0.2807520031929016
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,32,2,128,1,fp8,fp8,0,0.2776799996693929
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,32,4,128,1,float16,float16,0,0.2827999989191691
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,32,4,128,1,float16,fp8,0,0.28277866045633954
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,32,4,128,1,fp8,fp8,0,0.2787040074666341
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,32,8,128,1,float16,float16,0,0.28484266996383667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,32,8,128,1,float16,fp8,0,0.28757866223653156
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,32,8,128,1,fp8,fp8,0,0.2827999989191691
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,32,32,128,1,float16,float16,0,0.17561600605646768
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,32,1,128,1,float16,float16,0,0.16076800227165222
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,32,1,128,1,float16,fp8,0,0.15940266847610474
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,32,1,128,1,fp8,fp8,0,0.154448002576828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,32,2,128,1,float16,float16,0,0.1616106629371643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,32,32,128,1,float16,fp8,0,0.17971199750900269
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,32,2,128,1,float16,fp8,0,0.16059733430544534
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,32,32,128,1,fp8,fp8,0,0.17578667402267456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,32,2,128,1,fp8,fp8,0,0.1539413332939148
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,32,4,128,1,float16,float16,0,0.16076800227165222
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,32,4,128,1,float16,fp8,0,0.1604213317235311
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,32,4,128,1,fp8,fp8,0,0.15684266885121664
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,32,8,128,1,float16,float16,0,0.1634986698627472
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,32,8,128,1,float16,fp8,0,0.16537066300710043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,32,8,128,1,fp8,fp8,0,0.16265066464742026
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,32,32,128,1,float16,float16,0,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,32,1,128,1,float16,fp8,0,0.10718400279680888
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,32,1,128,1,float16,float16,0,0.106495996316274
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,32,32,128,1,float16,fp8,0,0.11332266529401143
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,32,1,128,1,fp8,fp8,0,0.10478933652242024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,32,32,128,1,fp8,fp8,0,0.11196266611417134
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,32,2,128,1,float16,float16,0,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,32,2,128,1,float16,fp8,0,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,32,2,128,1,fp8,fp8,0,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,32,4,128,1,float16,float16,0,0.10611733794212341
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,32,4,128,1,float16,fp8,0,0.10751466949780782
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,32,8,128,1,float16,float16,0,0.10786133011182149
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,32,4,128,1,fp8,fp8,0,0.10444266597429912
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,32,8,128,1,float16,fp8,0,0.10990933577219646
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,32,8,128,1,fp8,fp8,0,0.106495996316274
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,32,1,128,1,float16,float16,0,2.6361172993977866
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,32,1,128,1,fp8,fp8,0,2.6385067303975425
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,32,1,128,1,float16,fp8,0,2.631338596343994
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,32,2,128,1,float16,float16,0,2.643616040547689
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,32,2,128,1,float16,fp8,0,2.654202620188395
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,32,2,128,1,fp8,fp8,0,2.660010655721029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,32,4,128,1,float16,float16,0,2.690709431966146
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,32,4,128,1,float16,fp8,0,2.6668373743693032
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,32,1,128,1,float16,float16,0,1.3312053680419922
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,32,4,128,1,fp8,fp8,0,2.6712748209635415
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,32,1,128,1,float16,fp8,0,1.3339306513468425
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,32,8,128,1,float16,float16,0,2.6890185674031577
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,32,8,128,1,float16,fp8,0,2.6955254872639975
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,32,8,128,1,fp8,fp8,0,2.7180372873942056
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,32,32,128,1,float16,float16,0,1.462432066599528
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,32,32,128,1,float16,fp8,0,1.4817280769348145
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,32,32,128,1,fp8,fp8,0,1.5059626897176106
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,32,1,128,1,fp8,fp8,0,1.333253304163615
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,32,2,128,1,float16,float16,0,1.3376693725585938
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,32,2,128,1,float16,fp8,0,1.3342773119608562
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,32,2,128,1,fp8,fp8,0,1.3369654019673665
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,32,4,128,1,float16,fp8,0,1.346874713897705
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,32,4,128,1,float16,float16,0,1.3393920262654622
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,32,4,128,1,fp8,fp8,0,1.3516799608866374
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,32,8,128,1,float16,float16,0,1.3714826901753743
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,32,1,128,1,float16,float16,0,0.6823253631591797
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,32,1,128,1,float16,fp8,0,0.6860853036244711
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,32,8,128,1,float16,fp8,0,1.3653333981831868
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,32,8,128,1,fp8,fp8,0,1.3742133776346843
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,32,1,128,1,fp8,fp8,0,0.6925653616587321
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,32,32,128,1,float16,float16,0,0.7488853136698405
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,32,32,128,1,float16,fp8,0,0.7591253121693929
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,32,2,128,1,float16,float16,0,0.6853919823964437
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,32,2,128,1,float16,fp8,0,0.6905173460642496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,32,2,128,1,fp8,fp8,0,0.6884799798329672
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,32,32,128,1,fp8,fp8,0,0.7693706353505453
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,32,4,128,1,float16,fp8,0,0.7004106839497884
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,32,4,128,1,fp8,fp8,0,0.6952906449635824
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,32,4,128,1,float16,float16,0,0.6952906449635824
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,32,8,128,1,fp8,fp8,0,0.7079306443532308
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,32,8,128,1,float16,fp8,0,0.702794631322225
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,32,8,128,1,float16,float16,0,0.7007573445638021
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,32,32,128,1,float16,float16,0,0.39662933349609375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,32,32,128,1,float16,fp8,0,0.4055039882659912
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,32,1,128,1,float16,float16,0,0.36132800579071045
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,32,1,128,1,float16,fp8,0,0.36402666568756104
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,32,1,128,1,fp8,fp8,0,0.3667626778284709
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,32,2,128,1,float16,float16,0,0.3643786509831746
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,32,2,128,1,float16,fp8,0,0.3653973340988159
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,32,2,128,1,fp8,fp8,0,0.3684693177541097
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,32,4,128,1,float16,float16,0,0.3681120077768962
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,32,32,128,1,fp8,fp8,0,0.4113066593805949
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,32,4,128,1,float16,fp8,0,0.3664160172144572
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,32,1,128,1,float16,float16,0,0.1991680065790812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,32,4,128,1,fp8,fp8,0,0.36881065368652344
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,32,8,128,1,float16,float16,0,0.3739306529362996
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,32,8,128,1,fp8,fp8,0,0.37460267543792725
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,32,8,128,1,float16,fp8,0,0.3735893170038859
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,32,32,128,1,float16,fp8,0,0.2239146629969279
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,32,32,128,1,float16,float16,0,0.22118399540583292
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,32,32,128,1,fp8,fp8,0,0.22803199291229248
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,32,1,128,1,float16,fp8,0,0.198634664217631
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,32,1,128,1,fp8,fp8,0,0.20241065820058188
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,32,2,128,1,float16,float16,0,0.19849065939585367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,32,4,128,1,float16,float16,0,0.20053333044052124
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,32,2,128,1,float16,fp8,0,0.1996799906094869
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,32,2,128,1,fp8,fp8,0,0.2039466698964437
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,32,4,128,1,float16,fp8,0,0.20341867208480835
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,32,4,128,1,fp8,fp8,0,0.2082293430964152
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,32,8,128,1,float16,float16,0,0.2053119937578837
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,32,8,128,1,float16,fp8,0,0.20921067396799722
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,32,1,128,1,float16,float16,0,0.11843732992808025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,32,1,128,1,float16,fp8,0,0.12049600481987
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,32,8,128,1,fp8,fp8,0,0.20991466442743936
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,32,32,128,1,float16,float16,0,0.13056533535321554
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,32,32,128,1,float16,fp8,0,0.13243732849756876
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,32,32,128,1,fp8,fp8,0,0.1336373289426168
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,32,1,128,1,fp8,fp8,0,0.1181013286113739
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,32,2,128,1,float16,float16,0,0.12014933427174886
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,32,2,128,1,float16,fp8,0,0.12012799580891927
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,32,2,128,1,fp8,fp8,0,0.11980266372362773
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,32,4,128,1,float16,float16,0,0.11981333295504253
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,32,4,128,1,float16,fp8,0,0.12049600481987
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,32,4,128,1,fp8,fp8,0,0.11776000261306763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,32,8,128,1,float16,float16,0,0.12117333213488261
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,32,8,128,1,float16,fp8,0,0.12152000268300374
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,32,1,128,1,float16,float16,0,0.08295466502507527
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,32,1,128,1,float16,fp8,0,0.08225599924723308
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,32,8,128,1,fp8,fp8,0,0.11912533640861511
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,32,2,128,1,float16,float16,0,0.08089600006739299
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,32,1,128,1,fp8,fp8,0,0.08089600006739299
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,32,32,128,1,fp8,fp8,0,0.08567466338475545
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,32,32,128,1,float16,float16,0,0.08430400490760803
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,32,32,128,1,float16,fp8,0,0.08706133564313252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,32,2,128,1,float16,fp8,0,0.08362666765848796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,32,2,128,1,fp8,fp8,0,0.08195200065771739
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,32,4,128,1,float16,fp8,0,0.08226666847864787
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,32,4,128,1,float16,float16,0,0.08191999793052673
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,32,8,128,1,float16,float16,0,0.08326933284600575
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,32,8,128,1,float16,fp8,0,0.08328533172607422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,32,4,128,1,fp8,fp8,0,0.08226666847864787
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,32,8,128,1,fp8,fp8,0,0.08328533172607422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,32,1,128,1,float16,float16,0,2.934272130330404
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,32,1,128,1,float16,fp8,0,2.9267733891805015
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,32,1,128,1,fp8,fp8,0,3.0697813034057617
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,32,2,128,1,float16,fp8,0,3.028138796488444
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,32,2,128,1,float16,float16,0,3.007802645365397
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,32,2,128,1,fp8,fp8,0,3.2715040842692056
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,32,4,128,1,float16,float16,0,3.0254081090291343
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,32,4,128,1,float16,fp8,0,3.0646613438924155
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,32,1,128,1,float16,float16,0,1.4909547170003254
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,32,4,128,1,fp8,fp8,0,3.267754554748535
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,32,1,128,1,float16,fp8,0,1.4875307083129883
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,32,8,128,1,float16,float16,0,3.0848000844319663
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,32,8,128,1,float16,fp8,0,3.0956907272338867
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,32,8,128,1,fp8,fp8,0,3.466917355855306
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,32,32,128,1,float16,fp8,0,1.6607519785563152
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,32,32,128,1,float16,float16,0,1.7088853518168132
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,32,32,128,1,fp8,fp8,0,1.7628159523010254
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,32,1,128,1,fp8,fp8,0,1.5469279289245605
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,32,2,128,1,float16,float16,0,1.5138187408447266
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,32,2,128,1,fp8,fp8,0,1.6208267211914062
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,32,2,128,1,float16,fp8,0,1.509376049041748
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,32,4,128,1,float16,float16,0,1.5144960085550945
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,32,4,128,1,float16,fp8,0,1.5148372650146484
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,32,4,128,1,fp8,fp8,0,1.6300373077392578
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,32,8,128,1,float16,fp8,0,1.537706693013509
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,32,8,128,1,float16,float16,0,1.537706693013509
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,32,1,128,1,float16,float16,0,0.7526346842447916
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,32,1,128,1,float16,fp8,0,0.7536693414052328
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,32,8,128,1,fp8,fp8,0,1.7290186882019043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,32,1,128,1,fp8,fp8,0,0.7865866820017496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,32,32,128,1,float16,float16,0,0.8434293270111084
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,32,2,128,1,float16,float16,0,0.7598079840342203
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,32,32,128,1,float16,fp8,0,0.8296106656392416
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,32,2,128,1,fp8,fp8,0,0.8122080167134603
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,32,2,128,1,float16,fp8,0,0.764245351155599
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,32,32,128,1,fp8,fp8,0,0.8874666690826416
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,32,4,128,1,float16,float16,0,0.7741440137227377
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,32,4,128,1,float16,fp8,0,0.7673119703928629
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,32,4,128,1,fp8,fp8,0,0.8111680348714193
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,32,8,128,1,float16,float16,0,0.7789226373036703
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,32,8,128,1,float16,fp8,0,0.7741440137227377
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,32,8,128,1,fp8,fp8,0,0.8570720354715983
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,32,32,128,1,float16,float16,0,0.42922667662302655
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,32,1,128,1,float16,float16,0,0.3882346550623576
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,32,32,128,1,float16,fp8,0,0.42445866266886395
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,32,1,128,1,float16,fp8,0,0.38621334234873456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,32,1,128,1,fp8,fp8,0,0.4031146764755249
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,32,2,128,1,float16,float16,0,0.3885813156763713
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,32,2,128,1,float16,fp8,0,0.3896053234736125
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,32,2,128,1,fp8,fp8,0,0.4119946559270223
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,32,4,128,1,float16,float16,0,0.39288000265757245
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,32,32,128,1,fp8,fp8,0,0.4514133135477702
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,32,4,128,1,float16,fp8,0,0.3945866823196411
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,32,4,128,1,fp8,fp8,0,0.4150613149007161
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,32,1,128,1,float16,float16,0,0.20565332969029745
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,32,1,128,1,float16,fp8,0,0.2051466703414917
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,32,8,128,1,float16,float16,0,0.4007253249486287
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,32,8,128,1,float16,fp8,0,0.39661868413289386
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,32,8,128,1,fp8,fp8,0,0.420693318049113
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,32,32,128,1,float16,float16,0,0.22630933920542398
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,32,32,128,1,float16,fp8,0,0.22426132361094156
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,32,32,128,1,fp8,fp8,0,0.23824532826741537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,32,1,128,1,fp8,fp8,0,0.21230934063593546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,32,2,128,1,float16,float16,0,0.206496000289917
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,32,2,128,1,float16,fp8,0,0.2065066695213318
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,32,2,128,1,fp8,fp8,0,0.2146773338317871
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,32,4,128,1,float16,float16,0,0.20787199338277182
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,32,4,128,1,float16,fp8,0,0.20787733793258667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,32,4,128,1,fp8,fp8,0,0.2164106567700704
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,32,8,128,1,float16,float16,0,0.21368000904719034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,32,8,128,1,float16,fp8,0,0.21026132504145303
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,32,8,128,1,fp8,fp8,0,0.2205066680908203
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,32,1,128,1,float16,fp8,0,0.11297600467999776
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,32,32,128,1,float16,float16,0,0.12664000193277994
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,32,1,128,1,float16,float16,0,0.11229866743087769
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,32,32,128,1,float16,fp8,0,0.12424533565839131
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,32,1,128,1,fp8,fp8,0,0.1129866639773051
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,32,2,128,1,float16,float16,0,0.11059199770291646
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,32,2,128,1,float16,fp8,0,0.10990400115648906
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,32,32,128,1,fp8,fp8,0,0.13038933277130127
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,32,2,128,1,fp8,fp8,0,0.11502933502197266
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,32,4,128,1,float16,float16,0,0.11227200428644817
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,32,4,128,1,float16,fp8,0,0.11230400204658508
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,32,4,128,1,fp8,fp8,0,0.11468799908955891
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,32,8,128,1,float16,fp8,0,0.11503466963768005
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,32,8,128,1,float16,float16,0,0.11706667145093282
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,32,8,128,1,fp8,fp8,0,0.11980799833933513
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,32,32,128,1,float16,float16,0,0.07338666419188182
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,32,1,128,1,float16,float16,0,0.068271999557813
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,32,32,128,1,float16,fp8,0,0.07372266550858815
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,32,1,128,1,float16,fp8,0,0.06894400219122569
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,32,1,128,1,fp8,fp8,0,0.06860800087451935
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,32,32,128,1,fp8,fp8,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,32,2,128,1,float16,fp8,0,0.06794666747252147
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,32,2,128,1,float16,float16,0,0.07028799752394359
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,32,4,128,1,float16,fp8,0,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,32,4,128,1,float16,float16,0,0.06894933183987935
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,32,2,128,1,fp8,fp8,0,0.06795200208822887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,32,4,128,1,fp8,fp8,0,0.06860800087451935
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,32,8,128,1,float16,float16,0,0.06963199873765309
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,32,8,128,1,float16,fp8,0,0.06860800087451935
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,32,8,128,1,fp8,fp8,0,0.06894933183987935
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,32,1,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,32,32,128,1,float16,fp8,0,0.041989331444104515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,32,1,128,1,float16,float16,0,0.040618665516376495
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,32,32,128,1,float16,float16,0,0.043663998444875084
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,32,1,128,1,fp8,fp8,0,0.040618665516376495
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,32,32,128,1,fp8,fp8,0,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,32,2,128,1,float16,fp8,0,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,32,2,128,1,fp8,fp8,0,0.040618665516376495
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,32,2,128,1,float16,float16,0,0.03993066648642222
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,32,4,128,1,float16,float16,0,0.03994133323431015
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,32,8,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,32,4,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,32,4,128,1,fp8,fp8,0,0.04095466683308283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,32,8,128,1,float16,fp8,0,0.04233066737651825
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,32,8,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,32,1,128,1,fp8,fp8,0,2.7579787572224936
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,32,1,128,1,float16,float16,0,2.5516533851623535
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,32,1,128,1,float16,fp8,0,2.553343931833903
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,32,2,128,1,float16,float16,0,2.6228267351786294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,32,2,128,1,float16,fp8,0,2.6446773211161294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,32,2,128,1,fp8,fp8,0,2.929663976033529
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,32,4,128,1,float16,fp8,0,2.651477336883545
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,32,4,128,1,float16,float16,0,2.642944018046061
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,32,1,128,1,float16,float16,0,1.2863146464029949
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,32,4,128,1,fp8,fp8,0,2.926426569620768
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,32,1,128,1,float16,fp8,0,1.2869973182678223
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,32,8,128,1,float16,float16,0,2.7047252655029297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,32,8,128,1,float16,fp8,0,2.6927785873413086
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,32,8,128,1,fp8,fp8,0,3.141455968221029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,32,32,128,1,float16,float16,0,1.5069866180419922
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,32,32,128,1,float16,fp8,0,1.4800267219543457
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,32,32,128,1,fp8,fp8,0,1.6061439514160156
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,32,1,128,1,fp8,fp8,0,1.3892265955607097
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,32,2,128,1,float16,float16,0,1.3150293032328289
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,32,2,128,1,float16,fp8,0,1.311743974685669
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,32,2,128,1,fp8,fp8,0,1.4576640129089355
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,32,4,128,1,float16,float16,0,1.3199360370635986
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,32,4,128,1,float16,fp8,0,1.3243680000305176
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,32,4,128,1,fp8,fp8,0,1.471461296081543
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,32,8,128,1,float16,float16,0,1.3448692957560222
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,32,8,128,1,float16,fp8,0,1.3380319277445476
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,32,1,128,1,float16,float16,0,0.6560426553090414
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,32,1,128,1,float16,fp8,0,0.6567253271738688
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,32,8,128,1,fp8,fp8,0,1.559893290201823
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,32,1,128,1,fp8,fp8,0,0.7045119603474935
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,32,2,128,1,float16,float16,0,0.6635306676228842
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,32,2,128,1,float16,fp8,0,0.6669332981109619
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,32,32,128,1,float16,float16,0,0.7461547056833903
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,32,32,128,1,float16,fp8,0,0.7287466526031494
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,32,2,128,1,fp8,fp8,0,0.735578695933024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,32,32,128,1,fp8,fp8,0,0.8016213575998942
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,32,4,128,1,float16,float16,0,0.6690133412679037
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,32,4,128,1,float16,fp8,0,0.6683306694030762
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,32,4,128,1,fp8,fp8,0,0.734549363454183
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,32,8,128,1,float16,float16,0,0.6830080350240072
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,32,8,128,1,float16,fp8,0,0.6772053241729736
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,32,8,128,1,fp8,fp8,0,0.777562697728475
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,32,1,128,1,float16,float16,0,0.33774932225545246
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,32,32,128,1,float16,float16,0,0.3807520071665446
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,32,32,128,1,float16,fp8,0,0.3729066848754883
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,32,1,128,1,float16,fp8,0,0.3397973378499349
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,32,32,128,1,fp8,fp8,0,0.4089119831720988
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,32,1,128,1,fp8,fp8,0,0.36300798257191974
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,32,2,128,1,float16,float16,0,0.3394560019175212
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,32,2,128,1,fp8,fp8,0,0.37255998452504474
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,32,2,128,1,float16,fp8,0,0.3394613265991211
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,32,4,128,1,float16,float16,0,0.346288005510966
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,32,4,128,1,float16,fp8,0,0.34321598211924237
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,32,4,128,1,fp8,fp8,0,0.37734933694203693
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,32,1,128,1,float16,float16,0,0.1790293256441752
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,32,1,128,1,float16,fp8,0,0.17819199959437051
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,32,8,128,1,float16,float16,0,0.3534506559371948
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,32,32,128,1,float16,float16,0,0.2020639975865682
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,32,8,128,1,float16,fp8,0,0.3517386515935262
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,32,8,128,1,fp8,fp8,0,0.37801599502563477
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,32,32,128,1,float16,fp8,0,0.19883199532826742
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,32,32,128,1,fp8,fp8,0,0.21504000822703043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,32,1,128,1,fp8,fp8,0,0.19048533837000528
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,32,2,128,1,float16,float16,0,0.17986132701237997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,32,2,128,1,fp8,fp8,0,0.1950826644897461
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,32,2,128,1,float16,fp8,0,0.18056533734003702
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,32,4,128,1,float16,float16,0,0.18227734168370566
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,32,4,128,1,float16,fp8,0,0.18125333388646445
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,32,4,128,1,fp8,fp8,0,0.19694934288660684
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,32,8,128,1,float16,float16,0,0.1853440006573995
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,32,8,128,1,float16,fp8,0,0.18465065956115723
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,32,8,128,1,fp8,fp8,0,0.20036800702412924
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,32,1,128,1,float16,float16,0,0.09899200002352397
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,32,1,128,1,float16,fp8,0,0.09693866968154907
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,32,32,128,1,float16,float16,0,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,32,1,128,1,fp8,fp8,0,0.10240532954533894
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,32,32,128,1,float16,fp8,0,0.11059199770291646
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,32,2,128,1,float16,float16,0,0.09829866886138916
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,32,2,128,1,float16,fp8,0,0.0993280013402303
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,32,32,128,1,fp8,fp8,0,0.11947199702262878
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,32,2,128,1,fp8,fp8,0,0.10308266679445903
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,32,4,128,1,float16,float16,0,0.09830400347709656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,32,4,128,1,float16,fp8,0,0.10037333766619365
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,32,4,128,1,fp8,fp8,0,0.10513599713643391
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,32,8,128,1,fp8,fp8,0,0.10990933577219646
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,32,8,128,1,float16,fp8,0,0.10103999574979146
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,32,8,128,1,float16,float16,0,0.10035733381907146
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,32,32,128,1,float16,float16,0,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,32,1,128,1,float16,float16,0,0.0580213318268458
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,32,1,128,1,fp8,fp8,0,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,32,32,128,1,float16,fp8,0,0.06211733321348826
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,32,1,128,1,float16,fp8,0,0.05836800237496694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,32,2,128,1,float16,fp8,0,0.058362667759259544
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,32,32,128,1,fp8,fp8,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,32,2,128,1,float16,float16,0,0.057349334160486855
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,32,4,128,1,float16,float16,0,0.05836800237496694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,32,4,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,32,2,128,1,fp8,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,32,4,128,1,fp8,fp8,0,0.06043733159701029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,32,8,128,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,32,8,128,1,fp8,fp8,0,0.060415998101234436
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,32,8,128,1,float16,float16,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,32,32,128,1,float16,fp8,0,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,32,32,128,1,fp8,fp8,0,0.03788800040880839
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,32,32,128,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,32,1,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,32,2,128,1,float16,float16,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,32,1,128,1,float16,fp8,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,32,1,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,32,2,128,1,float16,fp8,0,0.03547733277082443
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,32,2,128,1,fp8,fp8,0,0.036533333361148834
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,32,4,128,1,float16,float16,0,0.036490666369597115
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,32,4,128,1,float16,fp8,0,0.03549333413441976
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,32,4,128,1,fp8,fp8,0,0.03547733277082443
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,32,8,128,1,float16,float16,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,32,32,128,1,float16,float16,0,0.02731200059254964
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,32,32,128,1,float16,fp8,0,0.030042665700117748
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,32,1,128,1,float16,float16,0,0.02867199977238973
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,32,32,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,32,8,128,1,fp8,fp8,0,0.03721600025892258
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,32,1,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,32,1,128,1,fp8,fp8,0,0.029045333464940388
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,32,2,128,1,float16,float16,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,32,2,128,1,fp8,fp8,0,0.027994667490323383
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,32,8,128,1,float16,fp8,0,0.036864000062147774
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,32,2,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,32,4,128,1,float16,float16,0,0.028309332827727
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,32,4,128,1,fp8,fp8,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,32,4,128,1,float16,fp8,0,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,32,8,128,1,float16,float16,0,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,32,8,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,32,8,128,1,fp8,fp8,0,0.027984000742435455
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,32,1,128,1,fp8,fp8,0,1.0180319945017497
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,32,1,128,1,float16,fp8,0,0.949722687403361
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,32,1,128,1,float16,float16,0,0.9514719645182291
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,32,2,128,1,float16,float16,0,0.9654613335927328
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,32,2,128,1,float16,fp8,0,0.9688693682352701
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,32,2,128,1,fp8,fp8,0,1.0637599627176921
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,32,4,128,1,float16,fp8,0,0.9780906836191813
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,32,4,128,1,float16,float16,0,0.9859413305918375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,32,1,128,1,float16,float16,0,0.48930132389068604
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,32,4,128,1,fp8,fp8,0,1.078447977701823
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,32,1,128,1,float16,fp8,0,0.4869120121002197
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,32,8,128,1,float16,float16,0,1.0153013070424397
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,32,8,128,1,float16,fp8,0,1.0054293473561604
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,32,8,128,1,fp8,fp8,0,1.1960373719533284
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,32,32,128,1,float16,float16,0,0.5768533150355021
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,32,32,128,1,float16,fp8,0,0.5563733180363973
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,32,32,128,1,fp8,fp8,0,0.6174506743748983
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,32,1,128,1,fp8,fp8,0,0.5183146794637045
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,32,2,128,1,float16,fp8,0,0.4961173137029012
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,32,2,128,1,float16,float16,0,0.49509867032368976
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,32,2,128,1,fp8,fp8,0,0.5454399983088175
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,32,4,128,1,float16,float16,0,0.5009066661198934
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,32,4,128,1,fp8,fp8,0,0.5488640069961548
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,32,4,128,1,float16,fp8,0,0.5019359985987345
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,32,8,128,1,float16,float16,0,0.5179733435312907
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,32,8,128,1,float16,fp8,0,0.5155893166859945
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,32,1,128,1,float16,float16,0,0.25737067063649494
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,32,1,128,1,float16,fp8,0,0.25975465774536133
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,32,1,128,1,fp8,fp8,0,0.27153066794077557
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,32,8,128,1,fp8,fp8,0,0.5939199924468994
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,32,32,128,1,float16,float16,0,0.30293333530426025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,32,2,128,1,float16,float16,0,0.25941334168116253
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,32,32,128,1,float16,fp8,0,0.2961066762606303
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,32,2,128,1,float16,fp8,0,0.25597866376241046
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,32,2,128,1,fp8,fp8,0,0.2797173261642456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,32,32,128,1,fp8,fp8,0,0.3169493277867635
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,32,4,128,1,float16,float16,0,0.2606079975763957
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,32,4,128,1,float16,fp8,0,0.26368532578150433
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,32,4,128,1,fp8,fp8,0,0.28006933132807416
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,32,8,128,1,float16,float16,0,0.27322133382161456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,32,8,128,1,float16,fp8,0,0.2715253432591756
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,32,32,128,1,float16,float16,0,0.16622933745384216
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,32,8,128,1,fp8,fp8,0,0.28758400678634644
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,32,1,128,1,float16,float16,0,0.13892799615859985
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,32,32,128,1,float16,fp8,0,0.1627946694691976
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,32,32,128,1,fp8,fp8,0,0.17016534010569254
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,32,1,128,1,float16,fp8,0,0.1388159990310669
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,32,1,128,1,fp8,fp8,0,0.1455839971701304
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,32,2,128,1,float16,float16,0,0.14148267110188803
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,32,2,128,1,float16,fp8,0,0.13994133472442627
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,32,4,128,1,float16,float16,0,0.14250666896502176
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,32,2,128,1,fp8,fp8,0,0.14779733618100485
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,32,4,128,1,float16,fp8,0,0.14131200313568115
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,32,4,128,1,fp8,fp8,0,0.14916266997655234
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,32,8,128,1,float16,float16,0,0.14525866508483887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,32,8,128,1,float16,fp8,0,0.1431893308957418
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,32,1,128,1,float16,float16,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,32,1,128,1,float16,fp8,0,0.07646400233109792
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,32,2,128,1,float16,float16,0,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,32,1,128,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,32,8,128,1,fp8,fp8,0,0.1532533367474874
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,32,32,128,1,float16,fp8,0,0.09216533104578654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,32,32,128,1,float16,float16,0,0.09351999560991923
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,32,32,128,1,fp8,fp8,0,0.09591466188430786
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,32,2,128,1,float16,fp8,0,0.07747733096281688
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,32,2,128,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,32,4,128,1,float16,float16,0,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,32,4,128,1,float16,fp8,0,0.07852800190448761
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,32,8,128,1,float16,float16,0,0.08124266564846039
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,32,4,128,1,fp8,fp8,0,0.07987200220425923
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,32,8,128,1,float16,fp8,0,0.08192533254623413
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,32,8,128,1,fp8,fp8,0,0.08669333656628926
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,32,32,128,1,float16,float16,0,0.05222400029500326
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,32,32,128,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,32,1,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,32,32,128,1,fp8,fp8,0,0.05598400036493937
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,32,1,128,1,fp8,fp8,0,0.04779199759165446
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,32,1,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,32,2,128,1,float16,fp8,0,0.04776533444722494
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,32,2,128,1,float16,float16,0,0.04844266672929128
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,32,2,128,1,fp8,fp8,0,0.047450666626294456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,32,4,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,32,4,128,1,float16,float16,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,32,4,128,1,fp8,fp8,0,0.048810665806134544
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,32,8,128,1,float16,float16,0,0.04846400022506714
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,32,8,128,1,float16,fp8,0,0.047781333327293396
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,32,8,128,1,fp8,fp8,0,0.048810665806134544
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,32,32,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,32,1,128,1,float16,float16,0,0.0306986669699351
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,32,32,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,32,1,128,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,32,32,128,1,fp8,fp8,0,0.03345600018898646
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,32,1,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,32,2,128,1,fp8,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,32,4,128,1,fp8,fp8,0,0.031045332551002502
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,32,4,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,32,2,128,1,float16,fp8,0,0.029685333371162415
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,32,8,128,1,float16,float16,0,0.03107200066248576
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,32,2,128,1,float16,float16,0,0.031397332747777305
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,32,4,128,1,float16,fp8,0,0.03140799949566523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,32,32,128,1,float16,float16,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,32,1,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,32,8,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,32,32,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,32,8,128,1,float16,fp8,0,0.032085334261258446
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,32,1,128,1,fp8,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,32,1,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,32,32,128,1,fp8,fp8,0,0.02628266563018163
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,32,2,128,1,float16,float16,0,0.024890666206677754
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,32,2,128,1,float16,fp8,0,0.02489600082238515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,32,4,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,32,4,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,32,2,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,32,4,128,1,float16,fp8,0,0.025263999899228413
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,32,8,128,1,float16,fp8,0,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,32,8,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,32,8,128,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,32,32,128,1,float16,float16,0,0.021146667500336964
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,32,32,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,32,32,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,32,1,128,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,32,2,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,32,1,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,32,2,128,1,float16,fp8,0,0.022885332504908245
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,32,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,32,2,128,1,fp8,fp8,0,0.021141332884629566
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,32,4,128,1,float16,float16,0,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,32,4,128,1,float16,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,32,4,128,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,32,8,128,1,float16,float16,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,32,8,128,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,32,8,128,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,32,1,128,1,float16,fp8,0,0.5166079998016357
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,32,1,128,1,float16,float16,0,0.5176639954249064
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,32,1,128,1,fp8,fp8,0,0.544426679611206
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,32,2,128,1,float16,float16,0,0.5304373502731323
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,32,2,128,1,float16,fp8,0,0.5232693354288737
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,32,4,128,1,float16,float16,0,0.5317973295847574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,32,2,128,1,fp8,fp8,0,0.5789066553115845
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,32,4,128,1,float16,fp8,0,0.5328266620635986
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,32,1,128,1,float16,float16,0,0.2701759934425354
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,32,4,128,1,fp8,fp8,0,0.5727680126825968
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,32,8,128,1,float16,float16,0,0.5512479941050211
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,32,8,128,1,float16,fp8,0,0.5468213160832723
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,32,32,128,1,float16,fp8,0,0.3056640028953552
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,32,8,128,1,fp8,fp8,0,0.6157600084940592
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,32,32,128,1,float16,float16,0,0.3141973416010539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,32,1,128,1,float16,fp8,0,0.2691359917322795
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,32,32,128,1,fp8,fp8,0,0.3319466710090637
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,32,1,128,1,fp8,fp8,0,0.2783626715342204
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,32,2,128,1,float16,float16,0,0.26949866612752277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,32,2,128,1,float16,fp8,0,0.2708746592203776
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,32,2,128,1,fp8,fp8,0,0.2868853410085042
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,32,4,128,1,float16,fp8,0,0.2752853234608968
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,32,4,128,1,float16,float16,0,0.27357866366704303
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,32,4,128,1,fp8,fp8,0,0.29064534107844037
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,32,8,128,1,float16,float16,0,0.2841759920120239
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,32,8,128,1,float16,fp8,0,0.28278932968775433
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,32,1,128,1,float16,float16,0,0.14317867159843445
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,32,1,128,1,float16,fp8,0,0.14387200276056925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,32,1,128,1,fp8,fp8,0,0.14863999684651694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,32,2,128,1,float16,float16,0,0.144378662109375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,32,8,128,1,fp8,fp8,0,0.29917333523432416
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,32,32,128,1,float16,float16,0,0.1704960068066915
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,32,32,128,1,float16,fp8,0,0.16486400365829468
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,32,32,128,1,fp8,fp8,0,0.17527467012405396
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,32,2,128,1,float16,fp8,0,0.1460906664530436
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,32,2,128,1,fp8,fp8,0,0.1520639955997467
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,32,4,128,1,float16,fp8,0,0.14711466431617737
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,32,4,128,1,float16,float16,0,0.14421866337458292
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,32,4,128,1,fp8,fp8,0,0.15479466319084167
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,32,8,128,1,float16,float16,0,0.1525759994983673
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,32,8,128,1,float16,fp8,0,0.14882133404413858
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,32,1,128,1,float16,float16,0,0.07851199805736542
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,32,8,128,1,fp8,fp8,0,0.1585493286450704
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,32,1,128,1,float16,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,32,1,128,1,fp8,fp8,0,0.08090133468310039
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,32,32,128,1,float16,fp8,0,0.09079466263453166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,32,32,128,1,float16,float16,0,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,32,2,128,1,float16,float16,0,0.07817066709200542
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,32,32,128,1,fp8,fp8,0,0.09727999567985535
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,32,2,128,1,float16,fp8,0,0.07918933530648549
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,32,2,128,1,fp8,fp8,0,0.08123200138409932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,32,4,128,1,float16,fp8,0,0.07918933530648549
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,32,4,128,1,float16,float16,0,0.07920533418655396
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,32,4,128,1,fp8,fp8,0,0.08260799944400787
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,32,8,128,1,float16,float16,0,0.0839573343594869
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,32,8,128,1,float16,fp8,0,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,32,8,128,1,fp8,fp8,0,0.08772266904513042
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,32,1,128,1,float16,fp8,0,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,32,1,128,1,float16,float16,0,0.0481279989083608
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,32,2,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,32,1,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,32,32,128,1,float16,float16,0,0.054272000988324486
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,32,2,128,1,float16,fp8,0,0.04882133503754934
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,32,32,128,1,float16,fp8,0,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,32,32,128,1,fp8,fp8,0,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,32,4,128,1,float16,fp8,0,0.04880533119042715
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,32,2,128,1,fp8,fp8,0,0.05020266771316528
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,32,4,128,1,fp8,fp8,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,32,4,128,1,float16,float16,0,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,32,8,128,1,float16,fp8,0,0.04984533290068308
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,32,32,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,32,8,128,1,float16,float16,0,0.049839998284975685
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,32,8,128,1,fp8,fp8,0,0.049829334020614624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,32,2,128,1,float16,float16,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,32,1,128,1,fp8,fp8,0,0.03143466760714849
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,32,1,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,32,32,128,1,float16,fp8,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,32,2,128,1,fp8,fp8,0,0.032399999598662056
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,32,1,128,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,32,2,128,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,32,32,128,1,fp8,fp8,0,0.03344533344109853
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,32,4,128,1,float16,float16,0,0.03140799949566523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,32,4,128,1,float16,fp8,0,0.03209066639343897
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,32,8,128,1,float16,float16,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,32,8,128,1,float16,fp8,0,0.03209066639343897
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,32,4,128,1,fp8,fp8,0,0.03140799949566523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,32,32,128,1,float16,float16,0,0.022522665560245514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,32,8,128,1,fp8,fp8,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,32,32,128,1,float16,fp8,0,0.0229120006163915
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,32,1,128,1,float16,float16,0,0.021509334444999695
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,32,32,128,1,fp8,fp8,0,0.02317333221435547
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,32,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,32,1,128,1,fp8,fp8,0,0.022842665513356526
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,32,2,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,32,2,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,32,4,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,32,2,128,1,fp8,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,32,4,128,1,fp8,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,32,4,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,32,8,128,1,float16,float16,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,32,8,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,32,8,128,1,fp8,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,32,32,128,1,float16,float16,0,0.018757333358128864
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,32,32,128,1,float16,fp8,0,0.01945066700379054
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,32,1,128,1,float16,float16,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,32,1,128,1,fp8,fp8,0,0.01878400022784869
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,32,2,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,32,32,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,32,1,128,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,32,4,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,32,4,128,1,float16,float16,0,0.018751999984184902
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,32,4,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,32,8,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,32,2,128,1,fp8,fp8,0,0.018805333723624546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,32,2,128,1,float16,fp8,0,0.01878400022784869
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,32,8,128,1,float16,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,32,8,128,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,32,32,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,32,32,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,32,32,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,32,1,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,32,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,32,1,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,32,2,128,1,float16,fp8,0,0.018794666975736618
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,32,1,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,32,2,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,32,4,128,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,32,4,128,1,float16,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,32,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,32,4,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,32,8,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,32,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,32,1,128,1,float16,float16,0,0.3524213234583537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,32,1,128,1,float16,fp8,0,0.35276798407236737
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,32,1,128,1,fp8,fp8,0,0.38997332255045575
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,32,2,128,1,float16,float16,0,0.3561760187149048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,32,2,128,1,float16,fp8,0,0.35516266028086346
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,32,4,128,1,float16,float16,0,0.3561866680781047
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,32,4,128,1,float16,fp8,0,0.3606239954630534
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,32,2,128,1,fp8,fp8,0,0.39529065291086835
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,32,1,128,1,float16,float16,0,0.18517865737279257
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,32,4,128,1,fp8,fp8,0,0.40516265233357746
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,32,8,128,1,float16,fp8,0,0.3671040137608846
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,32,8,128,1,float16,float16,0,0.3712000052134196
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,32,8,128,1,fp8,fp8,0,0.40482131640116376
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,32,32,128,1,float16,float16,0,0.21029865741729736
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,32,32,128,1,float16,fp8,0,0.20496533314387003
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,32,32,128,1,fp8,fp8,0,0.23073599735895792
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,32,1,128,1,fp8,fp8,0,0.2053119937578837
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,32,1,128,1,float16,fp8,0,0.1855093240737915
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,32,2,128,1,float16,float16,0,0.18703466653823853
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,32,2,128,1,float16,fp8,0,0.1868799924850464
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,32,2,128,1,fp8,fp8,0,0.20599466562271118
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,32,4,128,1,float16,float16,0,0.1890986760457357
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,32,4,128,1,float16,fp8,0,0.18637333313624063
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,32,4,128,1,fp8,fp8,0,0.2087200085322062
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,32,8,128,1,float16,float16,0,0.19473600387573242
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,32,1,128,1,float16,float16,0,0.10035199920336406
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,32,1,128,1,float16,fp8,0,0.1013759970664978
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,32,8,128,1,fp8,fp8,0,0.2112906575202942
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,32,32,128,1,float16,float16,0,0.11504000425338745
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,32,8,128,1,float16,fp8,0,0.1904639999071757
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,32,32,128,1,fp8,fp8,0,0.1249066690603892
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,32,32,128,1,float16,fp8,0,0.11264000336329143
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,32,1,128,1,fp8,fp8,0,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,32,2,128,1,float16,fp8,0,0.10170132915178935
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,32,2,128,1,float16,float16,0,0.09966400265693665
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,32,4,128,1,fp8,fp8,0,0.10990400115648906
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,32,2,128,1,fp8,fp8,0,0.10684266686439514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,32,4,128,1,float16,float16,0,0.1013759970664978
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,32,8,128,1,float16,float16,0,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,32,4,128,1,float16,fp8,0,0.10171199838320415
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,32,8,128,1,fp8,fp8,0,0.11332266529401143
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,32,1,128,1,float16,float16,0,0.060421332716941833
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,32,32,128,1,float16,float16,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,32,32,128,1,float16,fp8,0,0.062463998794555664
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,32,32,128,1,fp8,fp8,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,32,8,128,1,float16,fp8,0,0.10478933652242024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,32,1,128,1,float16,fp8,0,0.060080001751581825
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,32,2,128,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,32,2,128,1,float16,float16,0,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,32,2,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,32,4,128,1,float16,float16,0,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,32,1,128,1,fp8,fp8,0,0.06348266700903575
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,32,4,128,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,32,8,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,32,1,128,1,float16,float16,0,0.03822933385769526
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,32,8,128,1,float16,fp8,0,0.06109866499900818
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,32,32,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,32,32,128,1,float16,fp8,0,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,32,8,128,1,float16,float16,0,0.06109866499900818
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,32,4,128,1,fp8,fp8,0,0.06382399797439575
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,32,1,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,32,1,128,1,float16,fp8,0,0.03719999889532725
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,32,2,128,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,32,2,128,1,float16,float16,0,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,32,2,128,1,fp8,fp8,0,0.038912000755469
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,32,32,128,1,fp8,fp8,0,0.04199466605981191
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,32,4,128,1,float16,float16,0,0.038912000755469
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,32,4,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,32,8,128,1,float16,float16,0,0.038912000755469
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,32,8,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,32,32,128,1,float16,float16,0,0.025263999899228413
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,32,8,128,1,fp8,fp8,0,0.039264000952243805
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,32,1,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,32,4,128,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,32,1,128,1,float16,fp8,0,0.025594666600227356
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,32,1,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,32,32,128,1,fp8,fp8,0,0.027290667096773785
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,32,2,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,32,2,128,1,float16,fp8,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,32,2,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,32,4,128,1,float16,fp8,0,0.025242666403452556
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,32,32,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,32,8,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,32,4,128,1,fp8,fp8,0,0.0269813338915507
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,32,8,128,1,float16,float16,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,32,8,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,32,32,128,1,float16,float16,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,32,32,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,32,1,128,1,float16,float16,0,0.01740266631046931
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,32,4,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,32,1,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,32,2,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,32,2,128,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,32,2,128,1,float16,fp8,0,0.01878400022784869
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,32,1,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,32,4,128,1,float16,float16,0,0.018757333358128864
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,32,32,128,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,32,4,128,1,fp8,fp8,0,0.018746666610240936
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,32,8,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,32,8,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,32,8,128,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,32,32,128,1,float16,fp8,0,0.017743999759356182
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,32,32,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,32,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,32,1,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,32,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,32,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,32,4,128,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,32,2,128,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,32,4,128,1,float16,float16,0,0.017418666432301205
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,32,4,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,32,4,128,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,32,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,32,8,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,32,32,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,32,8,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,32,32,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,32,32,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,32,32,128,1,fp8,fp8,0,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,32,1,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,32,2,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,32,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,32,2,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,32,2,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,32,4,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,32,4,128,1,float16,float16,0,0.016741332908471424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,32,4,128,1,fp8,fp8,0,0.016741332908471424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,32,1,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,32,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,32,8,128,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,32,8,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,32,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,32,1,128,1,float16,float16,0,0.28145066897074383
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,32,1,128,1,float16,fp8,0,0.28381333748499554
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,32,1,128,1,fp8,fp8,0,0.3152373234430949
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,32,2,128,1,float16,float16,0,0.2841599980990092
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,32,4,128,1,float16,float16,0,0.2834720015525818
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,32,2,128,1,fp8,fp8,0,0.31760533650716144
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,32,2,128,1,float16,fp8,0,0.28142933050791424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,32,4,128,1,float16,fp8,0,0.2851840058962504
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,32,4,128,1,fp8,fp8,0,0.3186346689860026
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,32,8,128,1,float16,float16,0,0.29098665714263916
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,32,8,128,1,float16,fp8,0,0.2872053384780884
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,32,1,128,1,float16,float16,0,0.14830933014551798
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,32,32,128,1,float16,float16,0,0.16247466206550598
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,32,32,128,1,float16,fp8,0,0.1585546632607778
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,32,1,128,1,float16,fp8,0,0.14899200201034546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,32,2,128,1,float16,float16,0,0.14847999811172485
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,32,8,128,1,fp8,fp8,0,0.3227360049883525
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,32,1,128,1,fp8,fp8,0,0.16434666514396667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,32,32,128,1,fp8,fp8,0,0.1800533334414164
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,32,2,128,1,float16,fp8,0,0.14865600069363913
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,32,2,128,1,fp8,fp8,0,0.16383467117945352
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,32,4,128,1,float16,fp8,0,0.14830933014551798
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,32,4,128,1,float16,float16,0,0.1493333379427592
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,32,4,128,1,fp8,fp8,0,0.16454933087031046
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,32,8,128,1,float16,fp8,0,0.15360533197720846
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,32,32,128,1,float16,float16,0,0.08942932883898418
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,32,1,128,1,float16,float16,0,0.08294400076071422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,32,1,128,1,float16,fp8,0,0.08328533172607422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,32,8,128,1,float16,float16,0,0.15342400471369425
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,32,8,128,1,fp8,fp8,0,0.17083734273910522
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,32,32,128,1,fp8,fp8,0,0.09898666540781657
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,32,32,128,1,float16,fp8,0,0.08703999718030293
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,32,1,128,1,fp8,fp8,0,0.09079466263453166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,32,2,128,1,fp8,fp8,0,0.08909866213798523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,32,2,128,1,float16,fp8,0,0.08428800106048584
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,32,2,128,1,float16,float16,0,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,32,4,128,1,fp8,fp8,0,0.09076266487439473
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,32,4,128,1,float16,float16,0,0.0846506655216217
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,32,8,128,1,float16,float16,0,0.08431466420491536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,32,8,128,1,fp8,fp8,0,0.09113599856694539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,32,8,128,1,float16,fp8,0,0.08567466338475545
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,32,1,128,1,float16,float16,0,0.05017066498597463
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,32,32,128,1,float16,float16,0,0.05222400029500326
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,32,4,128,1,float16,fp8,0,0.0846506655216217
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,32,1,128,1,float16,fp8,0,0.050517335534095764
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,32,32,128,1,float16,fp8,0,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,32,1,128,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,32,32,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,32,2,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,32,4,128,1,float16,fp8,0,0.05120533208052317
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,32,2,128,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,32,2,128,1,float16,float16,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,32,4,128,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,32,8,128,1,float16,float16,0,0.050517335534095764
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,32,8,128,1,fp8,fp8,0,0.05393599967161814
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,32,32,128,1,float16,float16,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,32,1,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,32,8,128,1,float16,fp8,0,0.05017599960168203
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,32,32,128,1,float16,fp8,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,32,4,128,1,float16,float16,0,0.05120000243186951
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,32,32,128,1,fp8,fp8,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,32,1,128,1,float16,fp8,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,32,1,128,1,fp8,fp8,0,0.034485332667827606
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,32,2,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,32,2,128,1,float16,fp8,0,0.031727999448776245
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,32,4,128,1,float16,float16,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,32,2,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,32,4,128,1,fp8,fp8,0,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,32,8,128,1,float16,float16,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,32,8,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,32,8,128,1,fp8,fp8,0,0.0341333324710528
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,32,32,128,1,float16,float16,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,32,32,128,1,float16,fp8,0,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,32,1,128,1,float16,float16,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,32,1,128,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,32,1,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,32,4,128,1,float16,fp8,0,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,32,32,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,32,2,128,1,float16,float16,0,0.021183999876181286
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,32,2,128,1,fp8,fp8,0,0.023205332458019257
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,32,2,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,32,4,128,1,float16,float16,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,32,4,128,1,float16,fp8,0,0.022885332504908245
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,32,4,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,32,8,128,1,float16,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,32,32,128,1,float16,float16,0,0.017077332983414333
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,32,1,128,1,float16,float16,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,32,8,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,32,32,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,32,32,128,1,float16,fp8,0,0.01876266673207283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,32,1,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,32,8,128,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,32,1,128,1,fp8,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,32,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,32,2,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,32,4,128,1,float16,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,32,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,32,2,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,32,4,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,32,32,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,32,8,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,32,32,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,32,8,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,32,1,128,1,fp8,fp8,0,0.016389333953460056
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,32,8,128,1,float16,float16,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,32,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,32,32,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,32,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,32,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,32,1,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,32,2,128,1,fp8,fp8,0,0.017743999759356182
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,32,4,128,1,float16,float16,0,0.01603200038274129
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,32,4,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,32,8,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,32,8,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,32,8,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,32,32,128,1,float16,float16,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,32,32,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,32,32,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,32,1,128,1,float16,fp8,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,32,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,32,1,128,1,fp8,fp8,0,0.015706667055686314
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,32,4,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,32,2,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,32,2,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,32,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,32,2,128,1,fp8,fp8,0,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,32,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,32,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,32,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,32,8,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,32,8,128,1,float16,fp8,0,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,32,1,128,1,float16,float16,0,0.2515626748402913
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,32,1,128,1,float16,fp8,0,0.2481493353843689
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,32,2,128,1,float16,float16,0,0.24679466088612875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,32,1,128,1,fp8,fp8,0,0.2739199995994568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,32,2,128,1,float16,fp8,0,0.24507200717926025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,32,2,128,1,fp8,fp8,0,0.2759733398755391
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,32,4,128,1,float16,float16,0,0.24815466006596884
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,32,4,128,1,fp8,fp8,0,0.279039998849233
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,32,4,128,1,float16,fp8,0,0.2481706738471985
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,32,8,128,1,float16,float16,0,0.2529279987017314
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,32,8,128,1,float16,fp8,0,0.2505333423614502
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,32,8,128,1,fp8,fp8,0,0.2807520031929016
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,32,32,128,1,float16,float16,0,0.13686933120091757
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,32,32,128,1,float16,fp8,0,0.13705066839853922
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,32,32,128,1,fp8,fp8,0,0.15497066577275595
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,32,1,128,1,float16,float16,0,0.13328533371289572
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,32,1,128,1,fp8,fp8,0,0.14506666858990988
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,32,2,128,1,float16,float16,0,0.13124266266822815
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,32,1,128,1,float16,fp8,0,0.13260799646377563
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,32,2,128,1,float16,fp8,0,0.13175466656684875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,32,2,128,1,fp8,fp8,0,0.14574933052062988
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,32,4,128,1,float16,fp8,0,0.1327786644299825
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,32,4,128,1,float16,float16,0,0.13175466656684875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,32,4,128,1,fp8,fp8,0,0.1460853318373362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,32,8,128,1,float16,fp8,0,0.13176533579826355
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,32,8,128,1,float16,float16,0,0.1322719951470693
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,32,1,128,1,float16,fp8,0,0.0744053324063619
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,32,8,128,1,fp8,fp8,0,0.14677866299947104
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,32,32,128,1,float16,float16,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,32,1,128,1,float16,float16,0,0.0751200020313263
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,32,1,128,1,fp8,fp8,0,0.08225599924723308
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,32,2,128,1,float16,float16,0,0.0747519979874293
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,32,32,128,1,fp8,fp8,0,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,32,32,128,1,float16,fp8,0,0.07577066620190938
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,32,2,128,1,float16,fp8,0,0.0747519979874293
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,32,4,128,1,fp8,fp8,0,0.08195733527342479
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,32,4,128,1,float16,float16,0,0.07407466570536296
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,32,8,128,1,float16,float16,0,0.0751039981842041
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,32,2,128,1,fp8,fp8,0,0.08157333234945933
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,32,4,128,1,float16,fp8,0,0.07577066620190938
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,32,8,128,1,float16,fp8,0,0.07441066702206929
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,32,8,128,1,fp8,fp8,0,0.08225599924723308
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,32,32,128,1,float16,float16,0,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,32,32,128,1,float16,fp8,0,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,32,1,128,1,float16,float16,0,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,32,32,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,32,2,128,1,float16,float16,0,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,32,2,128,1,fp8,fp8,0,0.048810665806134544
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,32,1,128,1,fp8,fp8,0,0.04915733138720194
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,32,2,128,1,float16,fp8,0,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,32,1,128,1,float16,fp8,0,0.04573333263397217
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,32,4,128,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,32,4,128,1,float16,fp8,0,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,32,4,128,1,fp8,fp8,0,0.049498667319615684
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,32,8,128,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,32,32,128,1,fp8,fp8,0,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,32,32,128,1,float16,float16,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,32,1,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,32,8,128,1,float16,fp8,0,0.04571733375390371
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,32,8,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,32,32,128,1,float16,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,32,1,128,1,float16,fp8,0,0.030031998952229817
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,32,1,128,1,fp8,fp8,0,0.03071466585000356
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,32,2,128,1,float16,float16,0,0.029701332251230877
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,32,2,128,1,fp8,fp8,0,0.030373332401116688
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,32,2,128,1,float16,fp8,0,0.029359998802344005
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,32,4,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,32,4,128,1,float16,fp8,0,0.029359998802344005
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,32,8,128,1,fp8,fp8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,32,8,128,1,float16,float16,0,0.030400000512599945
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,32,32,128,1,float16,float16,0,0.022522665560245514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,32,8,128,1,float16,fp8,0,0.030031998952229817
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,32,32,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,32,32,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,32,4,128,1,fp8,fp8,0,0.03140799949566523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,32,1,128,1,float16,fp8,0,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,32,2,128,1,float16,float16,0,0.02181333303451538
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,32,1,128,1,fp8,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,32,1,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,32,2,128,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,32,2,128,1,float16,fp8,0,0.022181332111358643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,32,4,128,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,32,4,128,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,32,4,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,32,8,128,1,float16,float16,0,0.021509334444999695
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,32,8,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,32,32,128,1,float16,float16,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,32,32,128,1,fp8,fp8,0,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,32,32,128,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,32,8,128,1,fp8,fp8,0,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,32,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,32,1,128,1,fp8,fp8,0,0.01775466650724411
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,32,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,32,2,128,1,float16,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,32,1,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,32,2,128,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,32,4,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,32,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,32,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,32,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,32,8,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,32,8,128,1,fp8,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,32,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,32,32,128,1,float16,fp8,0,0.016693333784739178
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,32,32,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,32,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,32,32,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,32,2,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,32,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,32,2,128,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,32,2,128,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,32,4,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,32,4,128,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,32,4,128,1,float16,fp8,0,0.016704000532627106
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,32,8,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,32,32,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,32,8,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,32,32,128,1,fp8,fp8,0,0.016037333756685257
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,32,32,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,32,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,32,1,128,1,float16,float16,0,0.014746667196353277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,32,8,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,32,1,128,1,fp8,fp8,0,0.01670933390657107
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,32,2,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,32,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,32,4,128,1,fp8,fp8,0,0.01571200042963028
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,32,4,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,32,4,128,1,float16,float16,0,0.016048000504573185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,32,8,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,32,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,32,8,128,1,fp8,fp8,0,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,32,8,128,1,float16,float16,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,24,1,128,1,float16,float16,0,11.827194213867188
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,24,1,128,1,fp8,fp8,0,9.087999979654947
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,24,1,128,1,float16,fp8,0,11.815252939860025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,24,2,128,1,float16,fp8,0,11.823450724283854
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,24,2,128,1,fp8,fp8,0,9.080511728922525
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,24,2,128,1,float16,float16,0,11.877024332682291
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,24,4,128,1,float16,float16,0,11.413679758707682
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,24,4,128,1,float16,fp8,0,11.965802510579428
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,24,1,128,1,float16,float16,0,5.7159678141276045
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,24,4,128,1,fp8,fp8,0,9.14192008972168
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,24,1,128,1,float16,fp8,0,5.936986923217773
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,24,24,128,1,float16,float16,0,6.010213216145833
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,24,24,128,1,float16,fp8,0,5.863936106363933
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,24,24,128,1,fp8,fp8,0,4.803930600484212
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,24,8,128,1,float16,float16,0,11.786922454833984
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,24,8,128,1,fp8,fp8,0,9.154917399088541
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,24,8,128,1,float16,fp8,0,11.912879943847656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,24,1,128,1,fp8,fp8,0,4.641274770100911
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,24,2,128,1,float16,float16,0,5.682517369588216
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,24,2,128,1,float16,fp8,0,5.810527801513672
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,24,2,128,1,fp8,fp8,0,4.655957221984863
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,24,4,128,1,float16,float16,0,5.745317459106445
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,24,4,128,1,float16,fp8,0,5.787482579549153
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,24,4,128,1,fp8,fp8,0,4.667562802632649
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,24,8,128,1,float16,float16,0,5.86735471089681
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,24,1,128,1,float16,float16,0,2.8231681187947593
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,24,1,128,1,float16,fp8,0,2.8439785639444985
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,24,8,128,1,float16,fp8,0,5.680122375488281
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,24,1,128,1,fp8,fp8,0,2.458144028981527
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,24,24,128,1,float16,float16,0,2.9284852345784507
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,24,8,128,1,fp8,fp8,0,4.68940798441569
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,24,24,128,1,float16,fp8,0,2.9076480865478516
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,24,2,128,1,float16,float16,0,2.831706682840983
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,24,2,128,1,fp8,fp8,0,2.4591360092163086
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,24,2,128,1,float16,fp8,0,2.9006614685058594
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,24,24,128,1,fp8,fp8,0,2.5308426221211753
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,24,4,128,1,float16,float16,0,2.861392021179199
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,24,4,128,1,float16,fp8,0,2.865503946940104
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,24,4,128,1,fp8,fp8,0,2.4642507235209146
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,24,8,128,1,float16,float16,0,2.8330666224161782
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,24,24,128,1,float16,float16,0,1.565008004506429
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,24,8,128,1,float16,fp8,0,2.84603214263916
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,24,24,128,1,float16,fp8,0,1.5822666486104329
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,24,8,128,1,fp8,fp8,0,2.477567990620931
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,24,1,128,1,float16,float16,0,1.545562744140625
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,24,1,128,1,float16,fp8,0,1.5360053380330403
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,24,1,128,1,fp8,fp8,0,1.3622612953186035
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,24,2,128,1,float16,float16,0,1.5329279899597168
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,24,24,128,1,fp8,fp8,0,1.4018826484680176
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,24,2,128,1,float16,fp8,0,1.5394026438395183
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,24,2,128,1,fp8,fp8,0,1.3598720232645671
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,24,4,128,1,float16,float16,0,1.5435093243916829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,24,4,128,1,float16,fp8,0,1.5551199913024902
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,24,4,128,1,fp8,fp8,0,1.3663840293884277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,24,8,128,1,float16,float16,0,1.5441919962565105
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,24,8,128,1,float16,fp8,0,1.5468907356262207
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,24,8,128,1,fp8,fp8,0,1.3759199778238933
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,24,1,128,1,float16,fp8,0,6.584485371907552
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,24,1,128,1,float16,float16,0,6.660613377888997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,24,1,128,1,fp8,fp8,0,5.445290883382161
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,24,2,128,1,float16,float16,0,6.763519922892253
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,24,2,128,1,float16,fp8,0,6.909269332885742
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,24,2,128,1,fp8,fp8,0,5.46389897664388
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,24,4,128,1,float16,float16,0,6.536202748616536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,24,4,128,1,float16,fp8,0,7.028736114501953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,24,1,128,1,float16,float16,0,3.270138740539551
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,24,4,128,1,fp8,fp8,0,5.473791758219401
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,24,8,128,1,float16,fp8,0,6.812677383422852
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,24,8,128,1,float16,float16,0,6.883850733439128
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,24,24,128,1,float16,float16,0,3.366570790608724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,24,1,128,1,float16,fp8,0,3.272197405497233
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,24,8,128,1,fp8,fp8,0,5.526869455973308
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,24,24,128,1,float16,fp8,0,3.388938585917155
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,24,1,128,1,fp8,fp8,0,2.8112265268961587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,24,24,128,1,fp8,fp8,0,2.9460426966349282
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,24,2,128,1,float16,float16,0,3.2455520629882812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,24,2,128,1,float16,fp8,0,3.2363627751668296
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,24,2,128,1,fp8,fp8,0,2.8214613596598306
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,24,4,128,1,float16,fp8,0,3.2969493865966797
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,24,4,128,1,float16,float16,0,3.3344958623250327
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,24,4,128,1,fp8,fp8,0,2.8320480982462564
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,24,8,128,1,float16,float16,0,3.276639938354492
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,24,8,128,1,float16,fp8,0,3.2663892110188804
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,24,1,128,1,float16,float16,0,1.7273173332214355
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,24,1,128,1,float16,fp8,0,1.704799969991048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,24,8,128,1,fp8,fp8,0,2.8545761108398438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,24,1,128,1,fp8,fp8,0,1.5076692899068196
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,24,24,128,1,float16,float16,0,1.7652053833007812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,24,24,128,1,float16,fp8,0,1.7771520614624023
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,24,2,128,1,float16,float16,0,1.7058134078979492
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,24,2,128,1,float16,fp8,0,1.7344640096028645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,24,2,128,1,fp8,fp8,0,1.514128049214681
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,24,24,128,1,fp8,fp8,0,1.569445292154948
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,24,4,128,1,float16,float16,0,1.7208107312520344
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,24,4,128,1,float16,fp8,0,1.7221867243448894
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,24,4,128,1,fp8,fp8,0,1.5148372650146484
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,24,8,128,1,float16,fp8,0,1.726293404897054
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,24,8,128,1,float16,float16,0,1.7245972951253254
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,24,1,128,1,float16,float16,0,0.9473653634389242
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,24,1,128,1,float16,fp8,0,0.9514613151550293
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,24,24,128,1,float16,float16,0,0.974677324295044
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,24,24,128,1,float16,fp8,0,0.9927679697672526
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,24,8,128,1,fp8,fp8,0,1.5319040616353352
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,24,1,128,1,fp8,fp8,0,0.8567413489023844
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,24,2,128,1,float16,float16,0,0.9507786432902018
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,24,24,128,1,fp8,fp8,0,0.8871093591054281
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,24,2,128,1,float16,fp8,0,0.9524906476338705
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,24,2,128,1,fp8,fp8,0,0.8591360251108805
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,24,4,128,1,float16,float16,0,0.9531733194986979
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,24,4,128,1,float16,fp8,0,0.964464028676351
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,24,4,128,1,fp8,fp8,0,0.8635733127593994
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,24,8,128,1,float16,float16,0,0.9559199810028076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,24,8,128,1,float16,fp8,0,0.9644373257954916
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,24,8,128,1,fp8,fp8,0,0.8673226833343506
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,24,1,128,1,float16,float16,0,4.573007901509603
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,24,1,128,1,float16,fp8,0,4.581199963887532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,24,1,128,1,fp8,fp8,0,3.9778931935628257
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,24,2,128,1,float16,float16,0,4.795589447021484
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,24,2,128,1,fp8,fp8,0,3.987621307373047
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,24,2,128,1,float16,fp8,0,4.907349268595378
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,24,4,128,1,float16,float16,0,4.632239977518718
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,24,4,128,1,float16,fp8,0,4.880912144978841
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,24,1,128,1,float16,float16,0,2.357247988382975
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,24,4,128,1,fp8,fp8,0,3.9961652755737305
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,24,24,128,1,float16,float16,0,2.4420639673868814
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,24,24,128,1,float16,fp8,0,2.50163205464681
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,24,8,128,1,float16,float16,0,4.6447038650512695
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,24,8,128,1,float16,fp8,0,4.808885256449382
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,24,8,128,1,fp8,fp8,0,4.041898727416992
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,24,1,128,1,float16,fp8,0,2.359802722930908
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,24,1,128,1,fp8,fp8,0,2.0667786598205566
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,24,24,128,1,fp8,fp8,0,2.173269271850586
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,24,2,128,1,float16,float16,0,2.3466666539510093
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,24,2,128,1,float16,fp8,0,2.3888266881306968
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,24,2,128,1,fp8,fp8,0,2.0732266108194985
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,24,4,128,1,float16,float16,0,2.3611626625061035
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,24,4,128,1,float16,fp8,0,2.393258730570475
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,24,4,128,1,fp8,fp8,0,2.0842132568359375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,24,8,128,1,float16,float16,0,2.385061264038086
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,24,1,128,1,float16,float16,0,1.2641279697418213
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,24,1,128,1,float16,fp8,0,1.2569599946339924
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,24,8,128,1,float16,fp8,0,2.384042739868164
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,24,24,128,1,float16,float16,0,1.3161760171254475
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,24,1,128,1,fp8,fp8,0,1.12009064356486
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,24,24,128,1,float16,fp8,0,1.3110613028208415
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,24,8,128,1,fp8,fp8,0,2.09988800684611
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,24,2,128,1,float16,float16,0,1.2668639818827312
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,24,2,128,1,float16,fp8,0,1.2610507011413574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,24,24,128,1,fp8,fp8,0,1.1716266473134358
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,24,2,128,1,fp8,fp8,0,1.121455987294515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,24,4,128,1,float16,float16,0,1.2757386366526287
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,24,4,128,1,float16,fp8,0,1.2665119965871174
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,24,4,128,1,fp8,fp8,0,1.1272532939910889
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,24,24,128,1,float16,float16,0,0.7294346491495768
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,24,8,128,1,float16,float16,0,1.2648106416066487
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,24,8,128,1,fp8,fp8,0,1.137168010075887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,24,8,128,1,float16,fp8,0,1.2849546273549397
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,24,1,128,1,float16,float16,0,0.7055253187815348
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,24,24,128,1,float16,fp8,0,0.7376213073730469
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,24,1,128,1,float16,fp8,0,0.7079359690348307
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,24,1,128,1,fp8,fp8,0,0.6459733247756958
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,24,2,128,1,float16,fp8,0,0.7099733352661133
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,24,2,128,1,fp8,fp8,0,0.6463146607081095
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,24,2,128,1,float16,float16,0,0.7068959871927897
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,24,24,128,1,fp8,fp8,0,0.6720853646596273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,24,4,128,1,float16,float16,0,0.7130506833394369
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,24,4,128,1,float16,fp8,0,0.7147573630015055
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,24,4,128,1,fp8,fp8,0,0.6483626763025919
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,24,8,128,1,float16,fp8,0,0.7212426662445068
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,24,8,128,1,float16,float16,0,0.7167999744415283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,24,8,128,1,fp8,fp8,0,0.6533173322677612
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,24,1,128,1,float16,fp8,0,6.51246960957845
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,24,1,128,1,fp8,fp8,0,5.345450719197591
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,24,1,128,1,float16,float16,0,6.473722457885742
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,24,2,128,1,float16,fp8,0,6.618282953898112
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,24,2,128,1,float16,float16,0,6.319445292154948
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,24,2,128,1,fp8,fp8,0,5.370538711547852
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,24,4,128,1,float16,fp8,0,6.26858647664388
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,24,4,128,1,float16,float16,0,6.551045099894206
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,24,1,128,1,float16,float16,0,3.109370549519857
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,24,4,128,1,fp8,fp8,0,5.4084211985270185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,24,24,128,1,float16,float16,0,3.2629706064860025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,24,1,128,1,float16,fp8,0,3.0912853876749673
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,24,8,128,1,float16,float16,0,6.7225602467854815
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,24,24,128,1,float16,fp8,0,3.3744373321533203
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,24,8,128,1,float16,fp8,0,6.772389094034831
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,24,8,128,1,fp8,fp8,0,5.434709548950195
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,24,24,128,1,fp8,fp8,0,2.90389347076416
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,24,1,128,1,fp8,fp8,0,2.7359625498453775
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,24,2,128,1,float16,float16,0,3.1056267420450845
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,24,2,128,1,float16,fp8,0,3.099162737528483
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,24,2,128,1,fp8,fp8,0,2.7362985610961914
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,24,4,128,1,float16,float16,0,3.210591952006022
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,24,4,128,1,float16,fp8,0,3.113487879435221
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,24,4,128,1,fp8,fp8,0,2.7485812505086265
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,24,1,128,1,float16,float16,0,1.6023893356323242
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,24,8,128,1,float16,float16,0,3.2046079635620117
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,24,8,128,1,float16,fp8,0,3.2554718653361
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,24,1,128,1,float16,fp8,0,1.6109226544698079
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,24,24,128,1,float16,float16,0,1.682773272196452
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,24,24,128,1,float16,fp8,0,1.7054719924926758
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,24,1,128,1,fp8,fp8,0,1.4349600474039714
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,24,2,128,1,float16,float16,0,1.6279892921447754
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,24,8,128,1,fp8,fp8,0,2.779792149861654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,24,2,128,1,float16,fp8,0,1.63754669825236
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,24,2,128,1,fp8,fp8,0,1.4349652926127117
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,24,24,128,1,fp8,fp8,0,1.5213227272033691
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,24,4,128,1,float16,fp8,0,1.625600020090739
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,24,4,128,1,float16,float16,0,1.6157013575236003
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,24,4,128,1,fp8,fp8,0,1.4457173347473145
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,24,24,128,1,float16,float16,0,0.9079466660817465
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,24,24,128,1,float16,fp8,0,0.9180160363515218
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,24,8,128,1,float16,float16,0,1.6460800170898438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,24,8,128,1,fp8,fp8,0,1.4641493161519368
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,24,8,128,1,float16,fp8,0,1.6375519434611003
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,24,1,128,1,float16,float16,0,0.8686933517456055
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,24,1,128,1,float16,fp8,0,0.8686986764272054
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,24,1,128,1,fp8,fp8,0,0.7876266638437907
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,24,2,128,1,float16,float16,0,0.8724533716837565
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,24,2,128,1,float16,fp8,0,0.8792746861775717
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,24,2,128,1,fp8,fp8,0,0.7927520275115967
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,24,24,128,1,fp8,fp8,0,0.8275626500447592
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,24,4,128,1,float16,float16,0,0.8758613268534342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,24,4,128,1,float16,fp8,0,0.8768853346506754
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,24,4,128,1,fp8,fp8,0,0.7947946389516195
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,24,1,128,1,float16,float16,0,0.49885865052541095
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,24,1,128,1,float16,fp8,0,0.5009119908014933
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,24,8,128,1,float16,float16,0,0.8820052941640218
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,24,8,128,1,float16,fp8,0,0.8877973556518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,24,8,128,1,fp8,fp8,0,0.7995733420054117
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,24,24,128,1,float16,float16,0,0.5242826541264852
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,24,24,128,1,float16,fp8,0,0.527018666267395
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,24,24,128,1,fp8,fp8,0,0.4828159809112549
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,24,1,128,1,fp8,fp8,0,0.46199464797973633
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,24,2,128,1,float16,float16,0,0.5015946626663208
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,24,2,128,1,fp8,fp8,0,0.4657546679178874
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,24,2,128,1,float16,fp8,0,0.5005653301874796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,24,4,128,1,fp8,fp8,0,0.4657546679178874
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,24,4,128,1,float16,float16,0,0.5060266653696696
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,24,4,128,1,float16,fp8,0,0.508410652478536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,24,8,128,1,float16,float16,0,0.5087680021921793
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,24,8,128,1,float16,fp8,0,0.5125120083491007
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,24,8,128,1,fp8,fp8,0,0.4708746671676636
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,24,1,128,1,float16,fp8,0,3.7463038762410483
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,24,1,128,1,float16,float16,0,3.7544854482014975
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,24,1,128,1,fp8,fp8,0,3.3399572372436523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,24,2,128,1,float16,float16,0,3.8855787913004556
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,24,2,128,1,float16,fp8,0,3.841541290283203
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,24,2,128,1,fp8,fp8,0,3.3547948201497397
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,24,4,128,1,float16,float16,0,3.77022393544515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,24,4,128,1,float16,fp8,0,3.9058879216512046
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,24,1,128,1,float16,float16,0,1.9044747352600098
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,24,4,128,1,fp8,fp8,0,3.374256134033203
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,24,24,128,1,float16,float16,0,2.0616587003072104
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,24,1,128,1,float16,fp8,0,1.906501293182373
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,24,8,128,1,float16,float16,0,3.852128028869629
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,24,8,128,1,float16,fp8,0,3.929306666056315
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,24,24,128,1,float16,fp8,0,2.051722685496012
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,24,8,128,1,fp8,fp8,0,3.4232266743977866
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,24,24,128,1,fp8,fp8,0,1.8489972750345867
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,24,1,128,1,fp8,fp8,0,1.7184425989786785
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,24,2,128,1,float16,float16,0,1.94815460840861
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,24,2,128,1,fp8,fp8,0,1.7259519894917805
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,24,2,128,1,float16,fp8,0,1.9167572657267253
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,24,4,128,1,float16,float16,0,1.9351894060770671
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,24,4,128,1,float16,fp8,0,1.9327999750773113
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,24,4,128,1,fp8,fp8,0,1.7368799845377605
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,24,8,128,1,float16,float16,0,1.9461174011230469
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,24,8,128,1,float16,fp8,0,1.9570345878601074
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,24,1,128,1,float16,fp8,0,1.0149599711100261
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,24,1,128,1,float16,float16,0,1.0050666332244873
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,24,1,128,1,fp8,fp8,0,0.9159680207570394
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,24,24,128,1,float16,fp8,0,1.0739999612172444
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,24,24,128,1,float16,float16,0,1.0647892951965332
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,24,24,128,1,fp8,fp8,0,0.978111982345581
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,24,8,128,1,fp8,fp8,0,1.76418670018514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,24,2,128,1,float16,fp8,0,1.0183680057525635
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,24,2,128,1,float16,float16,0,1.0088160037994385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,24,2,128,1,fp8,fp8,0,0.9173333644866943
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,24,4,128,1,float16,float16,0,1.0122559865315754
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,24,4,128,1,float16,fp8,0,1.0132479667663574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,24,4,128,1,fp8,fp8,0,0.9241440296173096
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,24,8,128,1,float16,float16,0,1.0245119730631511
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,24,24,128,1,float16,float16,0,0.5829973220825195
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,24,24,128,1,float16,fp8,0,0.5884533325831095
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,24,8,128,1,fp8,fp8,0,0.9323679606119791
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,24,8,128,1,float16,fp8,0,1.0422613620758057
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,24,1,128,1,float16,float16,0,0.5505706469217936
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,24,1,128,1,float16,fp8,0,0.553978681564331
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,24,1,128,1,fp8,fp8,0,0.5101066827774048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,24,2,128,1,float16,float16,0,0.5550080140431722
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,24,24,128,1,fp8,fp8,0,0.5420373280843099
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,24,2,128,1,fp8,fp8,0,0.5135413408279419
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,24,2,128,1,float16,fp8,0,0.5566933155059814
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,24,4,128,1,float16,float16,0,0.5652426481246948
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,24,4,128,1,float16,fp8,0,0.5614933172861735
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,24,4,128,1,fp8,fp8,0,0.5152479807535807
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,24,8,128,1,float16,float16,0,0.5659306844075521
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,24,1,128,1,float16,float16,0,0.32921600341796875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,24,1,128,1,float16,fp8,0,0.3295519948005676
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,24,24,128,1,float16,float16,0,0.3479893207550049
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,24,8,128,1,float16,fp8,0,0.5676373243331909
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,24,24,128,1,float16,fp8,0,0.35278932253519696
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,24,8,128,1,fp8,fp8,0,0.5215573310852051
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,24,24,128,1,fp8,fp8,0,0.32614399989446
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,24,1,128,1,fp8,fp8,0,0.3080480098724365
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,24,2,128,1,float16,float16,0,0.32922132809956867
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,24,2,128,1,float16,fp8,0,0.33159999052683514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,24,2,128,1,fp8,fp8,0,0.30702932675679523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,24,4,128,1,float16,float16,0,0.3333066701889038
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,24,4,128,1,float16,fp8,0,0.3346773386001587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,24,4,128,1,fp8,fp8,0,0.3121440013249715
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,24,8,128,1,float16,float16,0,0.33740798632303876
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,24,8,128,1,float16,fp8,0,0.3394560019175212
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,24,8,128,1,fp8,fp8,0,0.31522132953008014
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,24,1,128,1,float16,float16,0,3.914581298828125
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,24,1,128,1,float16,fp8,0,3.906042734781901
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,24,1,128,1,fp8,fp8,0,3.5636908213297525
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,24,2,128,1,float16,fp8,0,3.9193598429361978
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,24,2,128,1,fp8,fp8,0,3.6157439549764
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,24,2,128,1,float16,float16,0,4.01203727722168
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,24,4,128,1,float16,float16,0,4.017840067545573
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,24,4,128,1,float16,fp8,0,4.026037216186523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,24,1,128,1,float16,float16,0,1.9467946688334148
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,24,4,128,1,fp8,fp8,0,3.6249653498331704
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,24,1,128,1,float16,fp8,0,1.9471359252929688
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,24,8,128,1,float16,float16,0,4.060341199239095
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,24,24,128,1,float16,float16,0,2.1415252685546875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,24,24,128,1,float16,fp8,0,2.12718931833903
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,24,8,128,1,float16,fp8,0,4.049413363138835
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,24,24,128,1,fp8,fp8,0,1.9756372769673665
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,24,8,128,1,fp8,fp8,0,3.6672798792521157
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,24,1,128,1,fp8,fp8,0,1.8099199930826824
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,24,2,128,1,float16,float16,0,1.9553227424621582
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,24,2,128,1,float16,fp8,0,1.9770026206970215
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,24,2,128,1,fp8,fp8,0,1.8252800305684407
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,24,4,128,1,float16,float16,0,1.9742827415466309
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,24,4,128,1,float16,fp8,0,1.97870937983195
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,24,4,128,1,fp8,fp8,0,1.830399990081787
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,24,8,128,1,float16,float16,0,1.9947519302368164
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,24,1,128,1,float16,float16,0,1.0142719745635986
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,24,1,128,1,float16,fp8,0,1.0149493217468262
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,24,8,128,1,float16,fp8,0,2.0090773900349936
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,24,1,128,1,fp8,fp8,0,0.942255973815918
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,24,8,128,1,fp8,fp8,0,1.8541280428568523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,24,24,128,1,float16,float16,0,1.0968746344248455
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,24,2,128,1,float16,fp8,0,1.020074685414632
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,24,24,128,1,float16,fp8,0,1.1163252989451091
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,24,2,128,1,float16,float16,0,1.0186879634857178
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,24,2,128,1,fp8,fp8,0,0.9477120240529379
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,24,24,128,1,fp8,fp8,0,1.0251946449279785
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,24,4,128,1,float16,float16,0,1.021445353825887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,24,4,128,1,float16,fp8,0,1.0231413046518962
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,24,4,128,1,fp8,fp8,0,0.9552266597747803
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,24,24,128,1,float16,float16,0,0.582314650217692
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,24,8,128,1,float16,float16,0,1.0388533274332683
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,24,8,128,1,float16,fp8,0,1.055578629175822
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,24,8,128,1,fp8,fp8,0,0.9654560089111328
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,24,24,128,1,float16,fp8,0,0.5881173213322958
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,24,1,128,1,float16,float16,0,0.5454613367716471
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,24,1,128,1,float16,fp8,0,0.5485226710637411
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,24,1,128,1,fp8,fp8,0,0.5108053286870321
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,24,2,128,1,float16,float16,0,0.547162652015686
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,24,2,128,1,fp8,fp8,0,0.5108053286870321
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,24,2,128,1,float16,fp8,0,0.5522773265838623
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,24,4,128,1,float16,fp8,0,0.5529653231302897
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,24,24,128,1,fp8,fp8,0,0.5512533187866211
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,24,4,128,1,float16,float16,0,0.552618662516276
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,24,4,128,1,fp8,fp8,0,0.5169546604156494
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,24,1,128,1,float16,float16,0,0.3094399968783061
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,24,8,128,1,float16,float16,0,0.5584213336308798
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,24,1,128,1,float16,fp8,0,0.3097760081291199
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,24,8,128,1,float16,fp8,0,0.5618133147557577
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,24,24,128,1,float16,float16,0,0.3333119948705037
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,24,8,128,1,fp8,fp8,0,0.5208853483200073
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,24,24,128,1,float16,fp8,0,0.3357013463973999
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,24,24,128,1,fp8,fp8,0,0.3141973416010539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,24,1,128,1,fp8,fp8,0,0.2937013308207194
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,24,2,128,1,float16,float16,0,0.3104426662127177
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,24,2,128,1,float16,fp8,0,0.31248533725738525
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,24,2,128,1,fp8,fp8,0,0.2940586606661479
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,24,4,128,1,float16,float16,0,0.31590400139490765
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,24,4,128,1,fp8,fp8,0,0.29681599140167236
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,24,4,128,1,float16,fp8,0,0.31590400139490765
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,24,8,128,1,float16,float16,0,0.3186346689860026
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,24,8,128,1,float16,fp8,0,0.32204800844192505
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,24,8,128,1,fp8,fp8,0,0.3022560079892476
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,24,1,128,1,float16,float16,0,0.193722665309906
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,24,24,128,1,float16,float16,0,0.20548266172409058
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,24,24,128,1,float16,fp8,0,0.20821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,24,1,128,1,float16,fp8,0,0.1971199909845988
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,24,1,128,1,fp8,fp8,0,0.18346667289733887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,24,24,128,1,fp8,fp8,0,0.19729065895080566
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,24,2,128,1,fp8,fp8,0,0.18346667289733887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,24,2,128,1,float16,float16,0,0.19558932383855185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,24,2,128,1,float16,fp8,0,0.1960960030555725
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,24,4,128,1,float16,fp8,0,0.19643733898798624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,24,4,128,1,float16,float16,0,0.1949066718419393
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,24,4,128,1,fp8,fp8,0,0.1853440006573995
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,24,8,128,1,float16,float16,0,0.19711466630299887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,24,8,128,1,float16,fp8,0,0.20002132654190063
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,24,8,128,1,fp8,fp8,0,0.1853440006573995
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,24,1,128,1,float16,fp8,0,2.531162738800049
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,24,1,128,1,fp8,fp8,0,2.3912107149759927
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,24,1,128,1,float16,float16,0,2.5175093015034995
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,24,2,128,1,float16,float16,0,2.542762597401937
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,24,2,128,1,float16,fp8,0,2.5287680625915527
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,24,2,128,1,fp8,fp8,0,2.400767962137858
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,24,4,128,1,float16,fp8,0,2.553002675374349
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,24,4,128,1,float16,float16,0,2.5799733797709146
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,24,1,128,1,float16,float16,0,1.3003093401590984
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,24,4,128,1,fp8,fp8,0,2.4284106890360513
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,24,1,128,1,float16,fp8,0,1.3016800085703533
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,24,24,128,1,float16,float16,0,1.389893372853597
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,24,8,128,1,float16,float16,0,2.588501294453939
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,24,24,128,1,float16,fp8,0,1.4039093653361003
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,24,8,128,1,float16,fp8,0,2.589183966318766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,24,8,128,1,fp8,fp8,0,2.466650644938151
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,24,24,128,1,fp8,fp8,0,1.3366559346516926
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,24,1,128,1,fp8,fp8,0,1.2195786635080974
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,24,2,128,1,fp8,fp8,0,1.222650686899821
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,24,2,128,1,float16,fp8,0,1.2914453347524006
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,24,2,128,1,float16,float16,0,1.3064586321512859
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,24,4,128,1,float16,float16,0,1.3016746838887532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,24,4,128,1,fp8,fp8,0,1.2291359901428223
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,24,4,128,1,float16,fp8,0,1.301642656326294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,24,1,128,1,float16,float16,0,0.6778879960378011
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,24,8,128,1,float16,fp8,0,1.3264266649882
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,24,8,128,1,float16,float16,0,1.3182186285654705
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,24,1,128,1,float16,fp8,0,0.6754986445109049
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,24,24,128,1,float16,float16,0,0.7260159651438395
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,24,24,128,1,float16,fp8,0,0.7433973153432211
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,24,1,128,1,fp8,fp8,0,0.6391466856002808
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,24,8,128,1,fp8,fp8,0,1.2526880105336506
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,24,2,128,1,float16,float16,0,0.6809546947479248
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,24,2,128,1,float16,fp8,0,0.680618683497111
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,24,24,128,1,fp8,fp8,0,0.7000586986541748
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,24,2,128,1,fp8,fp8,0,0.6411946614583334
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,24,4,128,1,float16,float16,0,0.681984027226766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,24,4,128,1,float16,fp8,0,0.6901760101318359
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,24,4,128,1,fp8,fp8,0,0.6493866840998331
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,24,8,128,1,float16,float16,0,0.690170685450236
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,24,24,128,1,float16,float16,0,0.39458131790161133
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,24,24,128,1,float16,fp8,0,0.40004265308380127
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,24,8,128,1,float16,fp8,0,0.6949546337127686
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,24,8,128,1,fp8,fp8,0,0.6567360162734985
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,24,1,128,1,float16,float16,0,0.36983466148376465
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,24,1,128,1,float16,fp8,0,0.3701759974161784
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,24,1,128,1,fp8,fp8,0,0.34867199261983234
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,24,2,128,1,float16,float16,0,0.3688160181045532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,24,2,128,1,float16,fp8,0,0.3688266674677531
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,24,2,128,1,fp8,fp8,0,0.3520960013071696
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,24,4,128,1,float16,float16,0,0.37187735239664715
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,24,24,128,1,fp8,fp8,0,0.38075733184814453
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,24,4,128,1,float16,fp8,0,0.3752959966659546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,24,4,128,1,fp8,fp8,0,0.3561813433965047
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,24,8,128,1,float16,float16,0,0.3787146806716919
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,24,24,128,1,float16,float16,0,0.2303946614265442
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,24,8,128,1,float16,fp8,0,0.3800746599833171
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,24,24,128,1,float16,fp8,0,0.23347733418146768
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,24,1,128,1,float16,float16,0,0.21232000986735025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,24,1,128,1,float16,fp8,0,0.21094399690628052
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,24,8,128,1,fp8,fp8,0,0.35922666390736896
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,24,24,128,1,fp8,fp8,0,0.22118399540583292
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,24,1,128,1,fp8,fp8,0,0.20070399840672812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,24,2,128,1,fp8,fp8,0,0.20206934213638306
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,24,2,128,1,float16,float16,0,0.21094399690628052
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,24,2,128,1,float16,fp8,0,0.21266667048136392
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,24,4,128,1,float16,fp8,0,0.21640533208847046
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,24,4,128,1,float16,float16,0,0.21574934323628744
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,24,4,128,1,fp8,fp8,0,0.20599466562271118
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,24,8,128,1,float16,float16,0,0.2198186715443929
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,24,8,128,1,float16,fp8,0,0.22085332870483398
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,24,1,128,1,float16,float16,0,0.1418239971001943
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,24,24,128,1,float16,float16,0,0.14622933665911356
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,24,8,128,1,fp8,fp8,0,0.2106026609738668
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,24,1,128,1,float16,fp8,0,0.14167466759681702
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,24,1,128,1,fp8,fp8,0,0.1327786644299825
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,24,24,128,1,float16,fp8,0,0.14677332838376364
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,24,24,128,1,fp8,fp8,0,0.14284800489743552
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,24,2,128,1,float16,float16,0,0.14079999923706055
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,24,2,128,1,fp8,fp8,0,0.13448533415794373
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,24,4,128,1,fp8,fp8,0,0.13380266229311624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,24,4,128,1,float16,float16,0,0.13909332950909933
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,24,4,128,1,float16,fp8,0,0.1423360009988149
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,24,8,128,1,float16,fp8,0,0.14284800489743552
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,24,8,128,1,float16,float16,0,0.1397760013739268
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,24,8,128,1,fp8,fp8,0,0.13482667009035745
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,24,2,128,1,float16,fp8,0,0.1418453355630239
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,24,1,128,1,float16,float16,0,2.919589360555013
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,24,1,128,1,fp8,fp8,0,2.8156585693359375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,24,1,128,1,float16,fp8,0,2.896725336710612
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,24,2,128,1,fp8,fp8,0,2.8388694127400718
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,24,2,128,1,float16,fp8,0,2.9264214833577475
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,24,2,128,1,float16,float16,0,2.910714785257975
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,24,4,128,1,float16,float16,0,2.9788106282552085
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,24,4,128,1,float16,fp8,0,3.0023625691731772
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,24,1,128,1,float16,float16,0,1.4762667020161946
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,24,4,128,1,fp8,fp8,0,2.8892319997151694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,24,1,128,1,float16,fp8,0,1.4776320457458496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,24,24,128,1,float16,float16,0,1.594879945119222
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,24,8,128,1,float16,float16,0,2.9709653854370117
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,24,8,128,1,float16,fp8,0,2.975402514139811
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,24,24,128,1,float16,fp8,0,1.6037492752075195
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,24,24,128,1,fp8,fp8,0,1.5860053698221843
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,24,8,128,1,fp8,fp8,0,2.920618693033854
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,24,1,128,1,fp8,fp8,0,1.4243839581807454
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,24,2,128,1,float16,float16,0,1.4824105898539226
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,24,2,128,1,fp8,fp8,0,1.4329172770182292
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,24,2,128,1,float16,fp8,0,1.4752373695373535
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,24,4,128,1,float16,float16,0,1.4738772710164387
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,24,4,128,1,fp8,fp8,0,1.4484480222066243
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,24,4,128,1,float16,fp8,0,1.4759252866109211
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,24,8,128,1,float16,float16,0,1.4960586229960124
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,24,8,128,1,float16,fp8,0,1.5100639661153157
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,24,1,128,1,float16,float16,0,0.7540000279744467
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,24,1,128,1,float16,fp8,0,0.7489226659138998
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,24,1,128,1,fp8,fp8,0,0.7311360041300455
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,24,8,128,1,fp8,fp8,0,1.4871946970621746
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,24,24,128,1,float16,float16,0,0.8169866402943929
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,24,24,128,1,float16,fp8,0,0.8299520015716553
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,24,2,128,1,float16,float16,0,0.7577653725941976
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,24,2,128,1,float16,fp8,0,0.7577653725941976
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,24,2,128,1,fp8,fp8,0,0.7369386355082194
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,24,24,128,1,fp8,fp8,0,0.8115200201670328
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,24,4,128,1,float16,float16,0,0.7608373165130615
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,24,4,128,1,float16,fp8,0,0.7703786691029867
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,24,4,128,1,fp8,fp8,0,0.7417226632436117
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,24,8,128,1,float16,float16,0,0.7741386890411377
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,24,8,128,1,float16,fp8,0,0.7765333652496338
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,24,24,128,1,float16,float16,0,0.4312746524810791
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,24,8,128,1,fp8,fp8,0,0.7560533682505289
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,24,24,128,1,float16,fp8,0,0.4374399979909261
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,24,1,128,1,float16,float16,0,0.40004265308380127
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,24,1,128,1,float16,fp8,0,0.39662933349609375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,24,1,128,1,fp8,fp8,0,0.38929065068562824
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,24,2,128,1,float16,float16,0,0.39935465653737384
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,24,2,128,1,fp8,fp8,0,0.3916800022125244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,24,2,128,1,float16,fp8,0,0.3983360131581624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,24,24,128,1,fp8,fp8,0,0.4288853406906128
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,24,4,128,1,float16,float16,0,0.4007253249486287
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,24,4,128,1,fp8,fp8,0,0.3945866823196411
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,24,4,128,1,float16,fp8,0,0.40859198570251465
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,24,1,128,1,float16,float16,0,0.21538132429122925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,24,1,128,1,float16,fp8,0,0.21742933988571167
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,24,8,128,1,float16,float16,0,0.4092586835225423
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,24,8,128,1,float16,fp8,0,0.4106239875157674
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,24,24,128,1,float16,float16,0,0.2392746607462565
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,24,1,128,1,fp8,fp8,0,0.21366933981577554
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,24,24,128,1,float16,fp8,0,0.24302933613459268
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,24,24,128,1,fp8,fp8,0,0.23586134115854898
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,24,8,128,1,fp8,fp8,0,0.4010719855626424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,24,2,128,1,float16,float16,0,0.22016000747680664
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,24,2,128,1,fp8,fp8,0,0.21571733554204306
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,24,2,128,1,float16,fp8,0,0.21880000829696655
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,24,4,128,1,float16,float16,0,0.2225546638170878
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,24,4,128,1,float16,fp8,0,0.22118399540583292
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,24,4,128,1,fp8,fp8,0,0.2177706758181254
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,24,1,128,1,float16,float16,0,0.12970133622487387
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,24,8,128,1,fp8,fp8,0,0.22323199113210043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,24,8,128,1,float16,float16,0,0.22698666652043661
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,24,24,128,1,float16,float16,0,0.14079466462135315
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,24,24,128,1,float16,fp8,0,0.14335999886194864
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,24,1,128,1,float16,fp8,0,0.13106667002042136
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,24,1,128,1,fp8,fp8,0,0.1225386659304301
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,24,24,128,1,fp8,fp8,0,0.14131200313568115
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,24,8,128,1,float16,fp8,0,0.22732800245285034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,24,2,128,1,fp8,fp8,0,0.12390399972597758
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,24,2,128,1,float16,float16,0,0.13055466612180075
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,24,2,128,1,float16,fp8,0,0.13038933277130127
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,24,4,128,1,float16,float16,0,0.1302186648050944
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,24,4,128,1,float16,fp8,0,0.13260799646377563
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,24,4,128,1,fp8,fp8,0,0.12458667159080505
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,24,8,128,1,float16,float16,0,0.1320693294207255
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,24,8,128,1,float16,fp8,0,0.1341386636098226
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,24,8,128,1,fp8,fp8,0,0.13038933277130127
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,24,1,128,1,float16,fp8,0,0.0890880028406779
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,24,1,128,1,float16,float16,0,0.0890880028406779
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,24,24,128,1,float16,float16,0,0.09215999643007915
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,24,24,128,1,float16,fp8,0,0.09284266829490662
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,24,24,128,1,fp8,fp8,0,0.08942932883898418
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,24,2,128,1,float16,float16,0,0.09010666608810425
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,24,1,128,1,fp8,fp8,0,0.086709330479304
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,24,2,128,1,fp8,fp8,0,0.08429867029190063
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,24,2,128,1,float16,fp8,0,0.09045867125193278
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,24,4,128,1,float16,float16,0,0.08909866213798523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,24,4,128,1,float16,fp8,0,0.09079466263453166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,24,8,128,1,float16,float16,0,0.09011733531951904
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,24,8,128,1,float16,fp8,0,0.09079999725023906
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,24,4,128,1,fp8,fp8,0,0.0846613347530365
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,24,8,128,1,fp8,fp8,0,0.08601599931716919
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,24,1,128,1,float16,float16,0,2.0220640500386557
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,24,1,128,1,float16,fp8,0,2.0319573084513345
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,24,1,128,1,fp8,fp8,0,2.0060106913248696
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,24,2,128,1,fp8,fp8,0,2.024085362752279
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,24,2,128,1,float16,float16,0,2.060976028442383
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,24,2,128,1,float16,fp8,0,2.0418666203816733
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,24,4,128,1,float16,float16,0,2.0654187202453613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,24,4,128,1,float16,fp8,0,2.0654187202453613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,24,1,128,1,float16,float16,0,1.018719991048177
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,24,4,128,1,fp8,fp8,0,2.0486720403035483
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,24,1,128,1,float16,fp8,0,1.0173439979553223
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,24,8,128,1,float16,float16,0,2.088618596394857
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,24,8,128,1,float16,fp8,0,2.0848639806111655
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,24,24,128,1,float16,fp8,0,1.1306506792704265
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,24,24,128,1,float16,float16,0,1.1316906611124675
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,24,8,128,1,fp8,fp8,0,2.0848639806111655
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,24,24,128,1,fp8,fp8,0,1.1357866923014324
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,24,1,128,1,fp8,fp8,0,1.0166666507720947
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,24,2,128,1,float16,float16,0,1.0231413046518962
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,24,2,128,1,float16,fp8,0,1.0275786717732747
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,24,2,128,1,fp8,fp8,0,1.02348796526591
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,24,4,128,1,float16,float16,0,1.0330453713734944
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,24,4,128,1,float16,fp8,0,1.046015977859497
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,24,4,128,1,fp8,fp8,0,1.034069299697876
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,24,8,128,1,float16,float16,0,1.0468693574269612
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,24,8,128,1,float16,fp8,0,1.0535253683725994
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,24,1,128,1,float16,float16,0,0.5338453451792399
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,24,1,128,1,float16,fp8,0,0.5304426749547323
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,24,1,128,1,fp8,fp8,0,0.525978684425354
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,24,24,128,1,float16,float16,0,0.5785599946975708
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,24,8,128,1,fp8,fp8,0,1.0521600246429443
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,24,2,128,1,float16,float16,0,0.5304319858551025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,24,24,128,1,float16,fp8,0,0.5847040017445883
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,24,2,128,1,float16,fp8,0,0.53657599290212
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,24,2,128,1,fp8,fp8,0,0.5314559936523438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,24,24,128,1,fp8,fp8,0,0.5894666512807211
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,24,4,128,1,float16,float16,0,0.535210649172465
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,24,4,128,1,float16,fp8,0,0.5403253237406412
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,24,4,128,1,fp8,fp8,0,0.534874677658081
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,24,8,128,1,float16,float16,0,0.5437386830647787
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,24,24,128,1,float16,float16,0,0.3090826670328776
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,24,8,128,1,float16,fp8,0,0.5468159914016724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,24,8,128,1,fp8,fp8,0,0.5478399991989136
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,24,24,128,1,float16,fp8,0,0.31522132953008014
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,24,1,128,1,float16,float16,0,0.2824479937553406
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,24,1,128,1,float16,fp8,0,0.28171199560165405
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,24,1,128,1,fp8,fp8,0,0.2817759911219279
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,24,2,128,1,float16,float16,0,0.2845013340314229
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,24,2,128,1,float16,fp8,0,0.2845013340314229
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,24,2,128,1,fp8,fp8,0,0.28722665707270306
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,24,24,128,1,fp8,fp8,0,0.3128319978713989
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,24,4,128,1,float16,float16,0,0.288592000802358
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,24,4,128,1,float16,fp8,0,0.2892799973487854
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,24,4,128,1,fp8,fp8,0,0.28826133410135907
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,24,1,128,1,float16,float16,0,0.15667200088500977
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,24,8,128,1,float16,float16,0,0.29100267092386883
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,24,8,128,1,float16,fp8,0,0.2950773239135742
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,24,1,128,1,float16,fp8,0,0.1570080022017161
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,24,24,128,1,float16,float16,0,0.17732266585032144
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,24,8,128,1,fp8,fp8,0,0.29337600866953534
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,24,24,128,1,float16,fp8,0,0.17783466974894205
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,24,24,128,1,fp8,fp8,0,0.1771519978841146
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,24,1,128,1,fp8,fp8,0,0.1532586713631948
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,24,2,128,1,float16,float16,0,0.1570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,24,2,128,1,float16,fp8,0,0.15683733423550925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,24,2,128,1,fp8,fp8,0,0.15530666708946228
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,24,4,128,1,float16,float16,0,0.16025599837303162
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,24,4,128,1,fp8,fp8,0,0.1621333360671997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,24,4,128,1,float16,fp8,0,0.15940800309181213
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,24,8,128,1,float16,float16,0,0.16485333442687988
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,24,8,128,1,float16,fp8,0,0.16435199975967407
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,24,24,128,1,float16,float16,0,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,24,8,128,1,fp8,fp8,0,0.16622933745384216
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,24,1,128,1,float16,float16,0,0.09898666540781657
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,24,24,128,1,float16,fp8,0,0.105813334385554
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,24,1,128,1,float16,fp8,0,0.09828266501426697
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,24,1,128,1,fp8,fp8,0,0.09523199995358785
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,24,2,128,1,float16,float16,0,0.09966933727264404
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,24,24,128,1,fp8,fp8,0,0.10752000411351521
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,24,2,128,1,float16,fp8,0,0.10035199920336406
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,24,2,128,1,fp8,fp8,0,0.09523733456929524
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,24,4,128,1,float16,float16,0,0.10000532865524292
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,24,4,128,1,float16,fp8,0,0.10001066327095032
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,24,4,128,1,fp8,fp8,0,0.09591466188430786
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,24,8,128,1,float16,float16,0,0.0993280013402303
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,24,8,128,1,float16,fp8,0,0.10105599959691365
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,24,24,128,1,float16,float16,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,24,24,128,1,float16,fp8,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,24,1,128,1,fp8,fp8,0,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,24,1,128,1,float16,fp8,0,0.06555200119813283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,24,1,128,1,float16,float16,0,0.06485333542029063
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,24,2,128,1,float16,fp8,0,0.06485333542029063
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,24,8,128,1,fp8,fp8,0,0.09727999567985535
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,24,24,128,1,fp8,fp8,0,0.06451199948787689
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,24,2,128,1,fp8,fp8,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,24,2,128,1,float16,float16,0,0.06417599817117055
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,24,4,128,1,float16,float16,0,0.06519466638565063
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,24,4,128,1,float16,fp8,0,0.06587199866771698
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,24,4,128,1,fp8,fp8,0,0.062458669145902
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,24,8,128,1,float16,float16,0,0.06485333542029063
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,24,8,128,1,fp8,fp8,0,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,24,8,128,1,float16,fp8,0,0.06586666901906331
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,24,1,128,1,float16,float16,0,2.2230985959370932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,24,1,128,1,float16,fp8,0,2.232837359110514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,24,1,128,1,fp8,fp8,0,2.3350559870402017
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,24,2,128,1,fp8,fp8,0,2.473482608795166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,24,2,128,1,float16,fp8,0,2.288474718729655
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,24,2,128,1,float16,float16,0,2.2809599240620932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,24,4,128,1,float16,float16,0,2.314746697743734
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,24,4,128,1,float16,fp8,0,2.340181350708008
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,24,1,128,1,float16,float16,0,1.1368052959442139
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,24,4,128,1,fp8,fp8,0,2.4971946080525718
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,24,24,128,1,float16,float16,0,1.249626636505127
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,24,24,128,1,float16,fp8,0,1.228117307027181
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,24,1,128,1,float16,fp8,0,1.1354453563690186
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,24,8,128,1,float16,float16,0,2.3864320119222007
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,24,8,128,1,float16,fp8,0,2.3666346867879233
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,24,24,128,1,fp8,fp8,0,1.3158453305562336
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,24,8,128,1,fp8,fp8,0,2.513754685719808
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,24,1,128,1,fp8,fp8,0,1.1757280031840007
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,24,2,128,1,float16,float16,0,1.1354506810506184
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,24,2,128,1,float16,fp8,0,1.1364479859670003
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,24,2,128,1,fp8,fp8,0,1.2267519632975261
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,24,4,128,1,float16,float16,0,1.1487627029418945
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,24,4,128,1,float16,fp8,0,1.1490986347198486
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,24,4,128,1,fp8,fp8,0,1.2120746771494548
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,24,8,128,1,float16,float16,0,1.1801599661509197
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,24,8,128,1,float16,fp8,0,1.163434664408366
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,24,1,128,1,float16,float16,0,0.5727519989013672
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,24,1,128,1,float16,fp8,0,0.5703680117925009
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,24,1,128,1,fp8,fp8,0,0.6027679840723673
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,24,24,128,1,float16,float16,0,0.6367573340733846
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,24,2,128,1,float16,float16,0,0.5789013306299845
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,24,8,128,1,fp8,fp8,0,1.260858694712321
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,24,2,128,1,float16,fp8,0,0.5809493462244669
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,24,24,128,1,float16,fp8,0,0.6306133270263672
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,24,2,128,1,fp8,fp8,0,0.6099626620610555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,24,24,128,1,fp8,fp8,0,0.6669706503550211
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,24,4,128,1,float16,float16,0,0.5847093264261881
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,24,4,128,1,float16,fp8,0,0.5816320180892944
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,24,4,128,1,fp8,fp8,0,0.6150879859924316
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,24,24,128,1,float16,float16,0,0.33159999052683514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,24,8,128,1,float16,float16,0,0.5952800114949545
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,24,8,128,1,fp8,fp8,0,0.6261759996414185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,24,8,128,1,float16,fp8,0,0.5942613283793131
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,24,24,128,1,float16,fp8,0,0.32681600252787274
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,24,1,128,1,float16,float16,0,0.297487994035085
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,24,1,128,1,float16,fp8,0,0.2974720001220703
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,24,24,128,1,fp8,fp8,0,0.3449173370997111
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,24,1,128,1,fp8,fp8,0,0.3087306618690491
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,24,2,128,1,float16,float16,0,0.3012266755104065
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,24,2,128,1,fp8,fp8,0,0.3158986568450928
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,24,2,128,1,float16,fp8,0,0.30293333530426025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,24,4,128,1,float16,float16,0,0.30350399017333984
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,24,4,128,1,float16,fp8,0,0.30293333530426025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,24,4,128,1,fp8,fp8,0,0.31828800837198895
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,24,1,128,1,float16,float16,0,0.16059733430544534
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,24,1,128,1,float16,fp8,0,0.1604320009549459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,24,8,128,1,float16,float16,0,0.31010133028030396
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,24,8,128,1,float16,fp8,0,0.307370662689209
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,24,8,128,1,fp8,fp8,0,0.32546667257944745
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,24,24,128,1,float16,float16,0,0.18039466937383017
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,24,24,128,1,float16,fp8,0,0.176639993985494
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,24,24,128,1,fp8,fp8,0,0.1853440006573995
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,24,1,128,1,fp8,fp8,0,0.1646933356920878
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,24,2,128,1,float16,float16,0,0.1616159975528717
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,24,2,128,1,float16,fp8,0,0.16127999623616537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,24,2,128,1,fp8,fp8,0,0.168122669061025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,24,4,128,1,float16,float16,0,0.16435199975967407
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,24,4,128,1,float16,fp8,0,0.16249600052833557
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,24,4,128,1,fp8,fp8,0,0.17149867614110312
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,24,8,128,1,float16,float16,0,0.16639999548594156
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,24,8,128,1,fp8,fp8,0,0.17407999436060587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,24,8,128,1,float16,fp8,0,0.1660373310248057
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,24,24,128,1,float16,float16,0,0.10308266679445903
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,24,1,128,1,float16,float16,0,0.09181867043177287
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,24,1,128,1,fp8,fp8,0,0.09081600109736125
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,24,1,128,1,float16,fp8,0,0.09352533022562663
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,24,24,128,1,float16,fp8,0,0.1013706624507904
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,24,2,128,1,float16,float16,0,0.09180800120035808
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,24,2,128,1,float16,fp8,0,0.09250133236249287
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,24,24,128,1,fp8,fp8,0,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,24,2,128,1,fp8,fp8,0,0.09353066484133403
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,24,4,128,1,float16,fp8,0,0.0918239951133728
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,24,4,128,1,float16,float16,0,0.09116799632708232
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,24,4,128,1,fp8,fp8,0,0.09284800291061401
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,24,8,128,1,float16,fp8,0,0.09250133236249287
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,24,8,128,1,float16,float16,0,0.0942133367061615
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,24,24,128,1,float16,float16,0,0.05973866581916809
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,24,8,128,1,fp8,fp8,0,0.09588799873987834
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,24,24,128,1,float16,fp8,0,0.061103999614715576
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,24,1,128,1,fp8,fp8,0,0.05733866492907206
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,24,1,128,1,float16,float16,0,0.05734399954477946
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,24,1,128,1,float16,fp8,0,0.05905066430568695
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,24,24,128,1,fp8,fp8,0,0.05973866581916809
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,24,2,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,24,2,128,1,float16,float16,0,0.05801600217819214
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,24,2,128,1,fp8,fp8,0,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,24,4,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,24,4,128,1,float16,fp8,0,0.0580213318268458
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,24,4,128,1,fp8,fp8,0,0.058042665322621666
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,24,8,128,1,float16,fp8,0,0.059061333537101746
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,24,8,128,1,fp8,fp8,0,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,24,8,128,1,float16,float16,0,0.05905066430568695
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,24,24,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,24,24,128,1,float16,float16,0,0.04167466859022776
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,24,24,128,1,float16,fp8,0,0.041989331444104515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,24,1,128,1,float16,float16,0,0.040965333580970764
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,24,1,128,1,fp8,fp8,0,0.03788800040880839
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,24,2,128,1,fp8,fp8,0,0.038917332887649536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,24,1,128,1,float16,fp8,0,0.03961066653331121
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,24,2,128,1,float16,float16,0,0.04027199993530909
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,24,4,128,1,fp8,fp8,0,0.03924266745646795
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,24,4,128,1,float16,float16,0,0.03958933303753535
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,24,2,128,1,float16,fp8,0,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,24,4,128,1,float16,fp8,0,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,24,8,128,1,float16,fp8,0,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,24,8,128,1,float16,float16,0,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,24,8,128,1,fp8,fp8,0,0.03754133234421412
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,24,1,128,1,float16,fp8,0,1.9447466532389324
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,24,1,128,1,float16,float16,0,1.946789264678955
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,24,1,128,1,fp8,fp8,0,2.080085277557373
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,24,2,128,1,float16,float16,0,2.035029411315918
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,24,2,128,1,float16,fp8,0,2.0090880393981934
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,24,2,128,1,fp8,fp8,0,2.219007968902588
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,24,4,128,1,float16,float16,0,2.036736011505127
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,24,4,128,1,float16,fp8,0,2.044586658477783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,24,1,128,1,float16,float16,0,0.9849173227945963
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,24,4,128,1,fp8,fp8,0,2.2530080477396646
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,24,1,128,1,float16,fp8,0,0.9859360059102377
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,24,24,128,1,float16,float16,0,1.1132533550262451
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,24,8,128,1,float16,float16,0,2.0971466700236
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,24,8,128,1,float16,fp8,0,2.0807679494222007
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,24,24,128,1,float16,fp8,0,1.087994654973348
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,24,24,128,1,fp8,fp8,0,1.192618687947591
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,24,8,128,1,fp8,fp8,0,2.2621866861979165
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,24,1,128,1,fp8,fp8,0,1.0525226593017578
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,24,2,128,1,float16,float16,0,1.0016427040100098
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,24,2,128,1,fp8,fp8,0,1.1033813158671062
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,24,2,128,1,float16,fp8,0,0.9985706806182861
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,24,4,128,1,float16,float16,0,1.0064213275909424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,24,4,128,1,float16,fp8,0,1.00437331199646
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,24,4,128,1,fp8,fp8,0,1.0876586437225342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,24,8,128,1,float16,float16,0,1.0367999871571858
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,24,8,128,1,float16,fp8,0,1.0238347053527832
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,24,1,128,1,float16,float16,0,0.5029439926147461
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,24,1,128,1,float16,fp8,0,0.5032960176467896
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,24,8,128,1,fp8,fp8,0,1.13372802734375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,24,1,128,1,fp8,fp8,0,0.5376160144805908
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,24,2,128,1,float16,float16,0,0.5104639927546183
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,24,24,128,1,float16,float16,0,0.5649013519287109
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,24,24,128,1,float16,fp8,0,0.5546666781107584
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,24,2,128,1,float16,fp8,0,0.5073599815368652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,24,24,128,1,fp8,fp8,0,0.601797342300415
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,24,2,128,1,fp8,fp8,0,0.5502453247706095
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,24,4,128,1,float16,float16,0,0.5131946802139282
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,24,4,128,1,float16,fp8,0,0.514901320139567
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,24,4,128,1,fp8,fp8,0,0.5536213318506876
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,24,8,128,1,float16,float16,0,0.5253066619237264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,24,8,128,1,float16,fp8,0,0.5179733435312907
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,24,8,128,1,fp8,fp8,0,0.5611519813537598
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,24,24,128,1,float16,float16,0,0.29474133253097534
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,24,1,128,1,float16,float16,0,0.26368000109990436
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,24,24,128,1,float16,fp8,0,0.2882506648699443
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,24,1,128,1,float16,fp8,0,0.2604373296101888
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,24,1,128,1,fp8,fp8,0,0.2773333390553792
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,24,2,128,1,float16,fp8,0,0.26470400889714557
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,24,2,128,1,float16,float16,0,0.26470400889714557
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,24,2,128,1,fp8,fp8,0,0.28245333830515545
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,24,24,128,1,fp8,fp8,0,0.3141973416010539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,24,4,128,1,float16,fp8,0,0.26505066951115924
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,24,4,128,1,float16,float16,0,0.2688000003496806
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,24,4,128,1,fp8,fp8,0,0.28621333837509155
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,24,1,128,1,float16,float16,0,0.14079466462135315
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,24,1,128,1,float16,fp8,0,0.13944000005722046
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,24,8,128,1,float16,float16,0,0.2725546757380168
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,24,8,128,1,float16,fp8,0,0.272213339805603
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,24,24,128,1,float16,float16,0,0.15854400396347046
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,24,8,128,1,fp8,fp8,0,0.29098133246103924
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,24,24,128,1,float16,fp8,0,0.15598932902018228
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,24,24,128,1,fp8,fp8,0,0.16811732451121011
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,24,1,128,1,fp8,fp8,0,0.14762133359909058
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,24,2,128,1,float16,float16,0,0.14114133516947427
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,24,2,128,1,fp8,fp8,0,0.15052800377209982
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,24,2,128,1,float16,fp8,0,0.14063466588656107
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,24,4,128,1,float16,float16,0,0.14387200276056925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,24,4,128,1,float16,fp8,0,0.14387200276056925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,24,4,128,1,fp8,fp8,0,0.15172266960144043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,24,8,128,1,float16,float16,0,0.1474560002485911
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,24,8,128,1,fp8,fp8,0,0.15668267011642456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,24,1,128,1,float16,float16,0,0.08089066545168559
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,24,8,128,1,float16,fp8,0,0.1460586686929067
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,24,1,128,1,float16,fp8,0,0.08053866525491078
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,24,24,128,1,float16,fp8,0,0.08806399504343669
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,24,24,128,1,float16,float16,0,0.0918239951133728
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,24,1,128,1,fp8,fp8,0,0.08295466502507527
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,24,2,128,1,float16,float16,0,0.08055999875068665
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,24,24,128,1,fp8,fp8,0,0.09796266754468282
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,24,2,128,1,float16,fp8,0,0.08090133468310039
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,24,4,128,1,float16,float16,0,0.08123733103275299
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,24,4,128,1,float16,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,24,4,128,1,fp8,fp8,0,0.08227733274300893
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,24,8,128,1,float16,float16,0,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,24,8,128,1,float16,fp8,0,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,24,8,128,1,fp8,fp8,0,0.08703999718030293
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,24,2,128,1,fp8,fp8,0,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,24,1,128,1,float16,float16,0,0.05052266518274943
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,24,24,128,1,float16,float16,0,0.05495466788609823
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,24,1,128,1,fp8,fp8,0,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,24,24,128,1,float16,fp8,0,0.054272000988324486
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,24,2,128,1,float16,float16,0,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,24,24,128,1,fp8,fp8,0,0.05359466870625814
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,24,2,128,1,fp8,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,24,1,128,1,float16,fp8,0,0.05156266689300537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,24,4,128,1,float16,float16,0,0.05119466781616211
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,24,4,128,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,24,8,128,1,float16,float16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,24,2,128,1,float16,fp8,0,0.05153066913286845
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,24,8,128,1,float16,fp8,0,0.050517335534095764
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,24,8,128,1,fp8,fp8,0,0.0532533327738444
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,24,1,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,24,24,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,24,1,128,1,fp8,fp8,0,0.034143999218940735
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,24,24,128,1,float16,fp8,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,24,4,128,1,fp8,fp8,0,0.052229334910710655
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,24,2,128,1,float16,float16,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,24,2,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,24,24,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,24,2,128,1,fp8,fp8,0,0.03513599932193756
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,24,4,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,24,1,128,1,float16,fp8,0,0.034815999368826546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,24,4,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,24,8,128,1,float16,float16,0,0.03379199902216593
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,24,4,128,1,fp8,fp8,0,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,24,8,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,24,1,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,24,24,128,1,float16,float16,0,0.031093334158261616
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,24,24,128,1,float16,fp8,0,0.029359998802344005
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,24,1,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,24,8,128,1,fp8,fp8,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,24,2,128,1,float16,fp8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,24,2,128,1,float16,float16,0,0.029701332251230877
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,24,2,128,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,24,4,128,1,float16,fp8,0,0.029002666473388672
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,24,4,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,24,1,128,1,fp8,fp8,0,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,24,24,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,24,8,128,1,float16,float16,0,0.030373332401116688
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,24,8,128,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,24,8,128,1,float16,fp8,0,0.03108799954255422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,24,4,128,1,fp8,fp8,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,24,1,128,1,float16,float16,0,0.7587839762369791
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,24,1,128,1,float16,fp8,0,0.7526400089263916
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,24,1,128,1,fp8,fp8,0,0.7768479983011881
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,24,2,128,1,float16,fp8,0,0.7679999669392904
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,24,2,128,1,fp8,fp8,0,0.8289226690928141
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,24,2,128,1,float16,float16,0,0.7744853496551514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,24,4,128,1,float16,float16,0,0.7802879810333252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,24,4,128,1,float16,fp8,0,0.7741493384043375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,24,1,128,1,float16,float16,0,0.39029332002003986
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,24,4,128,1,fp8,fp8,0,0.8366133371988932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,24,24,128,1,float16,float16,0,0.45448533693949383
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,24,8,128,1,float16,float16,0,0.8101653258005778
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,24,8,128,1,float16,fp8,0,0.8053812980651855
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,24,8,128,1,fp8,fp8,0,0.8601600329081217
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,24,24,128,1,float16,fp8,0,0.4466346502304077
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,24,24,128,1,fp8,fp8,0,0.46882132689158124
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,24,1,128,1,fp8,fp8,0,0.4020853439966838
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,24,1,128,1,float16,fp8,0,0.38758401075998944
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,24,2,128,1,float16,float16,0,0.3973120053609212
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,24,2,128,1,float16,fp8,0,0.3942399819691976
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,24,2,128,1,fp8,fp8,0,0.4113226731618245
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,24,4,128,1,float16,float16,0,0.4020906686782837
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,24,4,128,1,fp8,fp8,0,0.4174559911092122
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,24,4,128,1,float16,fp8,0,0.3986826737721761
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,24,8,128,1,float16,float16,0,0.4150613149007161
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,24,8,128,1,float16,fp8,0,0.4065279960632324
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,24,1,128,1,float16,float16,0,0.20886399348576865
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,24,1,128,1,float16,fp8,0,0.20582934220631918
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,24,8,128,1,fp8,fp8,0,0.4254666566848755
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,24,1,128,1,fp8,fp8,0,0.20872533321380615
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,24,24,128,1,float16,float16,0,0.24064532915751138
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,24,24,128,1,float16,fp8,0,0.23825599749883017
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,24,24,128,1,fp8,fp8,0,0.2474666635195414
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,24,2,128,1,float16,float16,0,0.20787199338277182
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,24,2,128,1,float16,fp8,0,0.21094399690628052
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,24,2,128,1,fp8,fp8,0,0.21504000822703043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,24,4,128,1,float16,float16,0,0.21334399779637656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,24,4,128,1,float16,fp8,0,0.20992000897725424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,24,4,128,1,fp8,fp8,0,0.2201546629269918
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,24,8,128,1,float16,float16,0,0.21845332781473795
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,24,8,128,1,float16,fp8,0,0.2177706758181254
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,24,24,128,1,float16,float16,0,0.13619200388590494
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,24,8,128,1,fp8,fp8,0,0.22393065690994263
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,24,1,128,1,float16,float16,0,0.1129866639773051
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,24,1,128,1,float16,fp8,0,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,24,1,128,1,fp8,fp8,0,0.11503466963768005
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,24,24,128,1,float16,fp8,0,0.13193066914876303
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,24,2,128,1,float16,float16,0,0.11638933420181274
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,24,2,128,1,float16,fp8,0,0.11571733156840007
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,24,2,128,1,fp8,fp8,0,0.11878933509190877
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,24,24,128,1,fp8,fp8,0,0.13516799608866373
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,24,4,128,1,float16,float16,0,0.11639466881752014
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,24,4,128,1,float16,fp8,0,0.1160586675008138
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,24,4,128,1,fp8,fp8,0,0.11877866586049397
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,24,8,128,1,float16,fp8,0,0.12083199620246887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,24,24,128,1,float16,float16,0,0.07611200213432312
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,24,1,128,1,float16,float16,0,0.06586133440335591
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,24,8,128,1,fp8,fp8,0,0.12288000186284383
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,24,24,128,1,float16,fp8,0,0.07509333391984303
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,24,1,128,1,float16,fp8,0,0.06689600149790446
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,24,24,128,1,fp8,fp8,0,0.07987200220425923
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,24,8,128,1,float16,float16,0,0.11980266372362773
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,24,1,128,1,fp8,fp8,0,0.06519466638565063
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,24,2,128,1,float16,fp8,0,0.06655466556549072
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,24,2,128,1,float16,float16,0,0.06656533479690552
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,24,2,128,1,fp8,fp8,0,0.06519466638565063
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,24,4,128,1,float16,fp8,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,24,4,128,1,float16,float16,0,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,24,4,128,1,fp8,fp8,0,0.06655466556549072
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,24,1,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,24,1,128,1,float16,float16,0,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,24,8,128,1,float16,fp8,0,0.067221333583196
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,24,8,128,1,float16,float16,0,0.06758933266003926
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,24,24,128,1,float16,float16,0,0.047450666626294456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,24,24,128,1,fp8,fp8,0,0.04746133089065552
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,24,8,128,1,fp8,fp8,0,0.07032000025113423
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,24,24,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,24,1,128,1,fp8,fp8,0,0.04266666869322459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,24,2,128,1,float16,float16,0,0.04438399771849314
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,24,4,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,24,2,128,1,float16,fp8,0,0.04507199923197428
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,24,4,128,1,float16,float16,0,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,24,4,128,1,fp8,fp8,0,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,24,8,128,1,float16,float16,0,0.04607999821503957
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,24,8,128,1,float16,fp8,0,0.04609066744645437
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,24,8,128,1,fp8,fp8,0,0.04437333345413208
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,24,2,128,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,24,24,128,1,float16,float16,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,24,1,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,24,1,128,1,float16,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,24,1,128,1,fp8,fp8,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,24,24,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,24,24,128,1,fp8,fp8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,24,2,128,1,float16,float16,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,24,4,128,1,float16,float16,0,0.03107733279466629
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,24,4,128,1,fp8,fp8,0,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,24,2,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,24,4,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,24,8,128,1,float16,float16,0,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,24,8,128,1,float16,fp8,0,0.030037333567937214
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,24,8,128,1,fp8,fp8,0,0.030048000315825146
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,24,24,128,1,float16,float16,0,0.0262773334980011
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,24,1,128,1,float16,float16,0,0.025941332181294758
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,24,24,128,1,float16,fp8,0,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,24,2,128,1,float16,fp8,0,0.029370665550231934
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,24,24,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,24,1,128,1,fp8,fp8,0,0.023200000325838726
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,24,2,128,1,float16,float16,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,24,2,128,1,fp8,fp8,0,0.024234667420387268
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,24,2,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,24,4,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,24,4,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,24,1,128,1,float16,fp8,0,0.026975999275843304
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,24,8,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,24,24,128,1,float16,fp8,0,0.022890667120615642
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,24,24,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,24,24,128,1,float16,float16,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,24,8,128,1,fp8,fp8,0,0.023898666103680927
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,24,4,128,1,float16,fp8,0,0.025594666600227356
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,24,8,128,1,float16,float16,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,24,1,128,1,fp8,fp8,0,0.02080533280968666
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,24,1,128,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,24,2,128,1,float16,float16,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,24,2,128,1,float16,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,24,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,24,4,128,1,fp8,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,24,8,128,1,float16,float16,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,24,8,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,24,1,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,24,8,128,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,24,4,128,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,24,4,128,1,float16,float16,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,24,1,128,1,float16,float16,0,0.40994131565093994
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,24,1,128,1,float16,fp8,0,0.40854934851328534
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,24,1,128,1,fp8,fp8,0,0.41812801361083984
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,24,2,128,1,float16,float16,0,0.41710933049519855
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,24,2,128,1,float16,fp8,0,0.4157173236211141
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,24,2,128,1,fp8,fp8,0,0.429909348487854
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,24,4,128,1,float16,float16,0,0.42308799425760907
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,24,4,128,1,float16,fp8,0,0.4210346539815267
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,24,4,128,1,fp8,fp8,0,0.43638400236765545
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,24,1,128,1,float16,float16,0,0.21538132429122925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,24,24,128,1,float16,float16,0,0.24916799863179526
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,24,8,128,1,float16,float16,0,0.4346880118052165
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,24,8,128,1,float16,fp8,0,0.43092799186706543
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,24,24,128,1,float16,fp8,0,0.2426933248837789
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,24,8,128,1,fp8,fp8,0,0.45448533693949383
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,24,24,128,1,fp8,fp8,0,0.2553173303604126
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,24,1,128,1,float16,fp8,0,0.2133493423461914
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,24,2,128,1,float16,float16,0,0.21675733725229898
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,24,2,128,1,float16,fp8,0,0.21708800395329794
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,24,1,128,1,fp8,fp8,0,0.21808000405629477
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,24,2,128,1,fp8,fp8,0,0.2211893399556478
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,24,4,128,1,float16,float16,0,0.22016000747680664
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,24,4,128,1,float16,fp8,0,0.22016533215840658
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,24,4,128,1,fp8,fp8,0,0.2273226579030355
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,24,8,128,1,float16,float16,0,0.2249386707941691
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,24,1,128,1,float16,float16,0,0.11741866668065389
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,24,1,128,1,float16,fp8,0,0.11710932850837708
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,24,24,128,1,float16,fp8,0,0.1327839990456899
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,24,8,128,1,fp8,fp8,0,0.23550933599472046
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,24,8,128,1,float16,fp8,0,0.22390933831532797
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,24,1,128,1,fp8,fp8,0,0.11741866668065389
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,24,24,128,1,fp8,fp8,0,0.1389173368612925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,24,24,128,1,float16,float16,0,0.13380266229311624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,24,2,128,1,float16,float16,0,0.11775466799736023
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,24,2,128,1,float16,fp8,0,0.11773866415023804
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,24,2,128,1,fp8,fp8,0,0.12049600481987
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,24,4,128,1,float16,float16,0,0.11776000261306763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,24,4,128,1,float16,fp8,0,0.11912533640861511
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,24,4,128,1,fp8,fp8,0,0.12356266379356384
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,24,8,128,1,float16,float16,0,0.12288000186284383
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,24,8,128,1,fp8,fp8,0,0.12595733006795248
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,24,1,128,1,float16,float16,0,0.06725333134333293
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,24,8,128,1,float16,fp8,0,0.12117333213488261
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,24,1,128,1,float16,fp8,0,0.068271999557813
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,24,24,128,1,float16,float16,0,0.07850133379300435
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,24,1,128,1,fp8,fp8,0,0.06656000018119812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,24,24,128,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,24,24,128,1,float16,fp8,0,0.07815999786059062
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,24,2,128,1,float16,float16,0,0.06690133114655812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,24,2,128,1,float16,fp8,0,0.06723733246326447
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,24,4,128,1,float16,float16,0,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,24,4,128,1,float16,fp8,0,0.067930668592453
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,24,4,128,1,fp8,fp8,0,0.06589866677920024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,24,8,128,1,float16,float16,0,0.06963199873765309
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,24,8,128,1,fp8,fp8,0,0.06999999781449635
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,24,8,128,1,float16,fp8,0,0.07000533243020375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,24,2,128,1,fp8,fp8,0,0.06689066688219707
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,24,24,128,1,float16,float16,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,24,1,128,1,float16,fp8,0,0.041984001795450844
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,24,1,128,1,float16,float16,0,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,24,1,128,1,fp8,fp8,0,0.041637333730856575
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,24,2,128,1,float16,float16,0,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,24,24,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,24,2,128,1,fp8,fp8,0,0.04232533276081085
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,24,24,128,1,fp8,fp8,0,0.04401599864164988
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,24,4,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,24,4,128,1,fp8,fp8,0,0.04230933388074239
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,24,2,128,1,float16,fp8,0,0.04332800209522247
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,24,4,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,24,8,128,1,float16,float16,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,24,8,128,1,fp8,fp8,0,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,24,8,128,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,24,24,128,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,24,24,128,1,float16,float16,0,0.031397332747777305
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,24,1,128,1,fp8,fp8,0,0.029359998802344005
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,24,24,128,1,fp8,fp8,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,24,2,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,24,1,128,1,float16,fp8,0,0.03071466585000356
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,24,2,128,1,fp8,fp8,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,24,2,128,1,float16,fp8,0,0.030037333567937214
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,24,4,128,1,float16,float16,0,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,24,4,128,1,float16,fp8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,24,1,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,24,8,128,1,float16,float16,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,24,4,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,24,8,128,1,fp8,fp8,0,0.029690665503342945
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,24,8,128,1,float16,fp8,0,0.031397332747777305
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,24,24,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,24,24,128,1,float16,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,24,24,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,24,1,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,24,2,128,1,float16,float16,0,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,24,1,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,24,2,128,1,float16,fp8,0,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,24,2,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,24,4,128,1,float16,fp8,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,24,8,128,1,float16,float16,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,24,4,128,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,24,4,128,1,fp8,fp8,0,0.02083733429511388
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,24,1,128,1,float16,float16,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,24,8,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,24,8,128,1,fp8,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,24,24,128,1,float16,float16,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,24,24,128,1,fp8,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,24,24,128,1,float16,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,24,1,128,1,float16,float16,0,0.018789333601792652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,24,1,128,1,float16,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,24,2,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,24,2,128,1,float16,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,24,2,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,24,4,128,1,float16,float16,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,24,4,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,24,8,128,1,float16,float16,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,24,4,128,1,fp8,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,24,8,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,24,8,128,1,fp8,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,24,24,128,1,float16,float16,0,0.018090666582187016
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,24,24,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,24,24,128,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,24,1,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,24,1,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,24,2,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,24,2,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,24,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,24,1,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,24,1,128,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,24,4,128,1,float16,float16,0,0.01740266631046931
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,24,8,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,24,8,128,1,float16,fp8,0,0.01876266673207283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,24,4,128,1,fp8,fp8,0,0.01876266673207283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,24,8,128,1,fp8,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,24,4,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,24,1,128,1,float16,float16,0,0.27323732773462933
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,24,1,128,1,float16,fp8,0,0.27289066712061566
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,24,1,128,1,fp8,fp8,0,0.29474133253097534
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,24,2,128,1,float16,float16,0,0.2783626715342204
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,24,2,128,1,fp8,fp8,0,0.29917333523432416
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,24,2,128,1,float16,fp8,0,0.2763146758079529
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,24,4,128,1,float16,float16,0,0.27767467498779297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,24,4,128,1,float16,fp8,0,0.28040534257888794
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,24,4,128,1,fp8,fp8,0,0.30532799164454144
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,24,8,128,1,float16,float16,0,0.2882560094197591
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,24,8,128,1,float16,fp8,0,0.2845013340314229
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,24,8,128,1,fp8,fp8,0,0.31249066193898517
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,24,24,128,1,float16,float16,0,0.1609386702378591
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,24,24,128,1,float16,fp8,0,0.1599146624406179
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,24,24,128,1,fp8,fp8,0,0.17681066195170084
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,24,1,128,1,float16,float16,0,0.14779733618100485
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,24,1,128,1,float16,fp8,0,0.14711999893188477
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,24,1,128,1,fp8,fp8,0,0.15581867098808289
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,24,2,128,1,float16,float16,0,0.14729066689809164
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,24,2,128,1,float16,fp8,0,0.14677332838376364
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,24,2,128,1,fp8,fp8,0,0.15922133127848306
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,24,4,128,1,float16,float16,0,0.15034666657447815
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,24,4,128,1,float16,fp8,0,0.15035733580589294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,24,8,128,1,float16,float16,0,0.1508639951546987
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,24,4,128,1,fp8,fp8,0,0.16331733266512552
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,24,1,128,1,float16,float16,0,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,24,1,128,1,float16,fp8,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,24,8,128,1,float16,fp8,0,0.1493333379427592
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,24,24,128,1,float16,float16,0,0.09129066268603007
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,24,8,128,1,fp8,fp8,0,0.16485866904258728
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,24,24,128,1,float16,fp8,0,0.09079466263453166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,24,24,128,1,fp8,fp8,0,0.09930133819580078
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,24,1,128,1,fp8,fp8,0,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,24,2,128,1,float16,float16,0,0.08089066545168559
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,24,2,128,1,float16,fp8,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,24,2,128,1,fp8,fp8,0,0.08431466420491536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,24,4,128,1,float16,float16,0,0.08226666847864787
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,24,4,128,1,float16,fp8,0,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,24,4,128,1,fp8,fp8,0,0.08635200063387553
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,24,8,128,1,float16,float16,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,24,1,128,1,float16,float16,0,0.05017599960168203
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,24,1,128,1,float16,fp8,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,24,8,128,1,fp8,fp8,0,0.09011200070381165
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,24,24,128,1,float16,float16,0,0.05188799897829691
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,24,24,128,1,fp8,fp8,0,0.05426666637261709
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,24,1,128,1,fp8,fp8,0,0.05085866649945577
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,24,24,128,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,24,8,128,1,float16,fp8,0,0.08259200056393941
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,24,2,128,1,float16,float16,0,0.05049600203831991
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,24,2,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,24,2,128,1,fp8,fp8,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,24,4,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,24,4,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,24,4,128,1,fp8,fp8,0,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,24,8,128,1,float16,float16,0,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,24,8,128,1,fp8,fp8,0,0.05188799897829691
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,24,24,128,1,float16,float16,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,24,1,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,24,1,128,1,float16,fp8,0,0.03345600018898646
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,24,24,128,1,fp8,fp8,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,24,24,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,24,2,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,24,1,128,1,fp8,fp8,0,0.033813332517941795
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,24,8,128,1,float16,fp8,0,0.05017599960168203
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,24,2,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,24,2,128,1,float16,fp8,0,0.03311999887228012
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,24,4,128,1,float16,float16,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,24,4,128,1,float16,fp8,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,24,8,128,1,float16,float16,0,0.03379733363787333
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,24,4,128,1,fp8,fp8,0,0.034815999368826546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,24,24,128,1,float16,float16,0,0.025285333395004272
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,24,8,128,1,float16,fp8,0,0.03482133398453394
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,24,24,128,1,fp8,fp8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,24,24,128,1,float16,fp8,0,0.024933333198229473
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,24,1,128,1,float16,float16,0,0.023904000719388325
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,24,1,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,24,2,128,1,float16,float16,0,0.024906667570273083
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,24,1,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,24,2,128,1,float16,fp8,0,0.02420799930890401
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,24,2,128,1,fp8,fp8,0,0.023887999355793
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,24,8,128,1,fp8,fp8,0,0.03379199902216593
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,24,4,128,1,float16,float16,0,0.02489600082238515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,24,4,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,24,4,128,1,fp8,fp8,0,0.024901332954565685
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,24,8,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,24,8,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,24,24,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,24,8,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,24,24,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,24,1,128,1,float16,float16,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,24,1,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,24,1,128,1,fp8,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,24,2,128,1,float16,float16,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,24,4,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,24,2,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,24,4,128,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,24,2,128,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,24,8,128,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,24,4,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,24,24,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,24,8,128,1,float16,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,24,8,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,24,24,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,24,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,24,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,24,24,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,24,24,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,24,2,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,24,4,128,1,float16,float16,0,0.016704000532627106
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,24,2,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,24,4,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,24,2,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,24,4,128,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,24,8,128,1,float16,float16,0,0.017749333133300144
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,24,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,24,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,24,8,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,24,24,128,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,24,24,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,24,1,128,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,24,24,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,24,2,128,1,float16,float16,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,24,2,128,1,float16,fp8,0,0.01810666670401891
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,24,2,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,24,4,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,24,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,24,4,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,24,1,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,24,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,24,8,128,1,float16,fp8,0,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,24,8,128,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,24,1,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,24,1,128,1,float16,float16,0,0.2177706758181254
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,24,1,128,1,float16,fp8,0,0.21742933988571167
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,24,1,128,1,fp8,fp8,0,0.23961599667867026
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,24,2,128,1,float16,fp8,0,0.21742399533589682
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,24,2,128,1,fp8,fp8,0,0.24133867025375366
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,24,4,128,1,float16,float16,0,0.218122661113739
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,24,2,128,1,float16,float16,0,0.2187839945157369
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,24,4,128,1,float16,fp8,0,0.21981332699457803
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,24,4,128,1,fp8,fp8,0,0.24544533093770346
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,24,8,128,1,float16,float16,0,0.22152533133824667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,24,8,128,1,float16,fp8,0,0.22152533133824667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,24,8,128,1,fp8,fp8,0,0.24849067131678262
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,24,24,128,1,float16,float16,0,0.1256160040696462
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,24,24,128,1,float16,fp8,0,0.12527466813723245
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,24,1,128,1,float16,fp8,0,0.11707733074824016
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,24,1,128,1,fp8,fp8,0,0.12596266468365988
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,24,2,128,1,float16,float16,0,0.11673600474993388
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,24,2,128,1,float16,fp8,0,0.11638933420181274
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,24,1,128,1,float16,float16,0,0.11741333206494649
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,24,24,128,1,fp8,fp8,0,0.14131200313568115
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,24,2,128,1,fp8,fp8,0,0.12732266386349997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,24,4,128,1,float16,float16,0,0.11570666233698527
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,24,4,128,1,fp8,fp8,0,0.12800533572832742
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,24,4,128,1,float16,fp8,0,0.11674132943153381
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,24,8,128,1,float16,float16,0,0.11741866668065389
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,24,8,128,1,float16,fp8,0,0.1181066632270813
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,24,1,128,1,float16,float16,0,0.06761600077152252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,24,1,128,1,float16,fp8,0,0.06656000018119812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,24,1,128,1,fp8,fp8,0,0.07167466481526692
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,24,24,128,1,float16,fp8,0,0.06960000097751617
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,24,24,128,1,fp8,fp8,0,0.07474133372306824
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,24,8,128,1,fp8,fp8,0,0.13056000073750815
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,24,2,128,1,float16,float16,0,0.06724266707897186
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,24,24,128,1,float16,float16,0,0.07032000025113423
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,24,2,128,1,float16,fp8,0,0.06654933094978333
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,24,2,128,1,fp8,fp8,0,0.07099733253320058
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,24,4,128,1,float16,float16,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,24,4,128,1,float16,fp8,0,0.067930668592453
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,24,4,128,1,fp8,fp8,0,0.07133866846561432
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,24,8,128,1,float16,fp8,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,24,24,128,1,float16,float16,0,0.04266666869322459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,24,24,128,1,float16,fp8,0,0.04337066908677419
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,24,24,128,1,fp8,fp8,0,0.04607999821503957
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,24,1,128,1,fp8,fp8,0,0.043696001172065735
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,24,2,128,1,float16,float16,0,0.042992000778516136
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,24,8,128,1,float16,float16,0,0.06828799843788147
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,24,1,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,24,1,128,1,float16,float16,0,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,24,2,128,1,float16,fp8,0,0.043007999658584595
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,24,8,128,1,fp8,fp8,0,0.0727040022611618
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,24,2,128,1,fp8,fp8,0,0.044719999035199486
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,24,4,128,1,float16,float16,0,0.041984001795450844
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,24,4,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,24,8,128,1,float16,float16,0,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,24,8,128,1,float16,fp8,0,0.04267199834187826
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,24,1,128,1,float16,float16,0,0.02867199977238973
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,24,24,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,24,24,128,1,float16,fp8,0,0.029359998802344005
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,24,1,128,1,float16,fp8,0,0.027647999425729115
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,24,1,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,24,4,128,1,fp8,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,24,8,128,1,fp8,fp8,0,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,24,24,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,24,2,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,24,2,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,24,4,128,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,24,4,128,1,float16,float16,0,0.0273333340883255
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,24,2,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,24,8,128,1,fp8,fp8,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,24,24,128,1,float16,float16,0,0.022885332504908245
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,24,24,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,24,1,128,1,float16,float16,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,24,24,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,24,8,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,24,4,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,24,1,128,1,float16,fp8,0,0.0207893339296182
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,24,2,128,1,float16,float16,0,0.021509334444999695
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,24,1,128,1,fp8,fp8,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,24,8,128,1,float16,fp8,0,0.027642667293548584
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,24,2,128,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,24,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,24,8,128,1,float16,float16,0,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,24,4,128,1,fp8,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,24,4,128,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,24,8,128,1,float16,fp8,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,24,24,128,1,float16,float16,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,24,24,128,1,float16,fp8,0,0.018437333405017853
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,24,4,128,1,float16,fp8,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,24,24,128,1,fp8,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,24,1,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,24,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,24,2,128,1,float16,float16,0,0.016751999656359356
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,24,2,128,1,float16,fp8,0,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,24,8,128,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,24,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,24,4,128,1,float16,float16,0,0.01740266631046931
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,24,4,128,1,float16,fp8,0,0.016741332908471424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,24,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,24,4,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,24,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,24,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,24,24,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,24,24,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,24,24,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,24,1,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,24,1,128,1,fp8,fp8,0,0.016048000504573185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,24,2,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,24,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,24,1,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,24,8,128,1,fp8,fp8,0,0.01674666628241539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,24,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,24,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,24,8,128,1,float16,float16,0,0.0164533331990242
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,24,2,128,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,24,4,128,1,fp8,fp8,0,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,24,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,24,8,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,24,24,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,24,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,24,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,24,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,24,24,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,24,1,128,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,24,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,24,4,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,24,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,24,4,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,24,8,128,1,float16,float16,0,0.01637866720557213
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,24,2,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,24,24,128,1,float16,float16,0,0.015029333531856537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,24,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,24,8,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,24,1,128,1,float16,float16,0,0.1890986760457357
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,24,1,128,1,float16,fp8,0,0.19012266397476196
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,24,2,128,1,float16,float16,0,0.1891040007273356
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,24,1,128,1,fp8,fp8,0,0.20991466442743936
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,24,2,128,1,float16,fp8,0,0.19233600298563638
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,24,4,128,1,float16,fp8,0,0.1906399925549825
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,24,4,128,1,float16,float16,0,0.19132266441980997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,24,2,128,1,fp8,fp8,0,0.20957332849502563
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,24,4,128,1,fp8,fp8,0,0.2099413275718689
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,24,8,128,1,float16,fp8,0,0.19234132766723633
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,24,8,128,1,fp8,fp8,0,0.21333332856496176
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,24,8,128,1,float16,float16,0,0.1914880077044169
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,24,24,128,1,float16,float16,0,0.10752000411351521
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,24,24,128,1,float16,fp8,0,0.1064906617005666
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,24,1,128,1,float16,fp8,0,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,24,24,128,1,fp8,fp8,0,0.11606933673222859
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,24,1,128,1,float16,float16,0,0.10444800059000652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,24,1,128,1,fp8,fp8,0,0.11331733067830403
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,24,2,128,1,float16,float16,0,0.10481599966684978
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,24,2,128,1,fp8,fp8,0,0.11400000254313152
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,24,4,128,1,float16,fp8,0,0.10444800059000652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,24,4,128,1,float16,float16,0,0.10309333602587382
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,24,2,128,1,float16,fp8,0,0.10444800059000652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,24,4,128,1,fp8,fp8,0,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,24,8,128,1,float16,float16,0,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,24,1,128,1,float16,fp8,0,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,24,1,128,1,float16,float16,0,0.060415998101234436
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,24,24,128,1,float16,fp8,0,0.0628053347269694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,24,8,128,1,float16,fp8,0,0.10478933652242024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,24,24,128,1,fp8,fp8,0,0.0675786683956782
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,24,24,128,1,float16,float16,0,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,24,8,128,1,fp8,fp8,0,0.11502933502197266
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,24,1,128,1,fp8,fp8,0,0.06553600231806438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,24,2,128,1,float16,float16,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,24,2,128,1,fp8,fp8,0,0.06623466809590657
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,24,2,128,1,float16,fp8,0,0.06075199941794077
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,24,4,128,1,float16,fp8,0,0.062463998794555664
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,24,4,128,1,float16,float16,0,0.061808000008265175
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,24,4,128,1,fp8,fp8,0,0.06588266789913177
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,24,8,128,1,float16,float16,0,0.06211733321348826
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,24,8,128,1,fp8,fp8,0,0.06656000018119812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,24,24,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,24,8,128,1,float16,fp8,0,0.062133332093556724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,24,24,128,1,float16,fp8,0,0.039264000952243805
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,24,1,128,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,24,1,128,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,24,24,128,1,fp8,fp8,0,0.04128533353408178
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,24,1,128,1,fp8,fp8,0,0.040618665516376495
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,24,2,128,1,float16,fp8,0,0.03754133234421412
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,24,2,128,1,float16,float16,0,0.03754133234421412
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,24,2,128,1,fp8,fp8,0,0.03994133323431015
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,24,4,128,1,float16,float16,0,0.03754133234421412
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,24,4,128,1,float16,fp8,0,0.03718933214743932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,24,8,128,1,fp8,fp8,0,0.039605334401130676
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,24,4,128,1,fp8,fp8,0,0.04027733455101649
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,24,24,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,24,8,128,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,24,8,128,1,float16,fp8,0,0.03719999889532725
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,24,1,128,1,float16,fp8,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,24,2,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,24,24,128,1,float16,fp8,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,24,1,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,24,1,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,24,24,128,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,24,2,128,1,float16,fp8,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,24,2,128,1,fp8,fp8,0,0.02661866694688797
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,24,4,128,1,float16,float16,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,24,4,128,1,float16,fp8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,24,24,128,1,float16,float16,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,24,8,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,24,24,128,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,24,4,128,1,fp8,fp8,0,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,24,8,128,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,24,8,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,24,24,128,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,24,1,128,1,float16,float16,0,0.02048533285657565
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,24,1,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,24,2,128,1,float16,float16,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,24,1,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,24,2,128,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,24,2,128,1,float16,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,24,4,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,24,4,128,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,24,4,128,1,float16,float16,0,0.020479999482631683
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,24,8,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,24,8,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,24,8,128,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,24,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,24,24,128,1,float16,float16,0,0.01669866715868314
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,24,24,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,24,24,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,24,1,128,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,24,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,24,2,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,24,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,24,2,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,24,4,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,24,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,24,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,24,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,24,1,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,24,8,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,24,24,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,24,24,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,24,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,24,1,128,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,24,24,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,24,1,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,24,2,128,1,float16,fp8,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,24,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,24,4,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,24,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,24,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,24,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,24,8,128,1,float16,fp8,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,24,8,128,1,fp8,fp8,0,0.016704000532627106
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,24,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,24,24,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,24,24,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,24,24,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,24,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,24,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,24,2,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,24,1,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,24,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,24,4,128,1,float16,float16,0,0.015354666858911514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,24,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,24,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,24,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,24,4,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,24,2,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,24,8,128,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,16,1,128,1,float16,fp8,0,7.778634389241536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,16,1,128,1,fp8,fp8,0,6.056618372599284
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,16,1,128,1,float16,float16,0,7.49842643737793
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,16,2,128,1,fp8,fp8,0,6.077599843343099
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,16,2,128,1,float16,fp8,0,7.759877522786458
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,16,2,128,1,float16,float16,0,7.564287821451823
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,16,4,128,1,float16,fp8,0,7.72984504699707
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,16,4,128,1,float16,float16,0,7.598762512207031
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,16,1,128,1,float16,float16,0,3.705530802408854
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,16,4,128,1,fp8,fp8,0,6.1032053629557295
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,16,16,128,1,float16,float16,0,3.7466506958007812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,16,16,128,1,float16,fp8,0,3.8770507176717124
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,16,1,128,1,float16,fp8,0,3.630762736002604
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,16,16,128,1,fp8,fp8,0,3.2414401372273765
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,16,8,128,1,float16,float16,0,7.967215855916341
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,16,8,128,1,float16,fp8,0,7.734272003173828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,16,8,128,1,fp8,fp8,0,6.165168126424153
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,16,1,128,1,fp8,fp8,0,3.1322399775187173
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,16,2,128,1,float16,fp8,0,3.7616745630900064
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,16,2,128,1,float16,float16,0,3.6724160512288413
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,16,2,128,1,fp8,fp8,0,3.1312214533487954
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,16,4,128,1,float16,float16,0,3.804346720377604
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,16,4,128,1,float16,fp8,0,3.6601174672444663
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,16,4,128,1,fp8,fp8,0,3.143360137939453
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,16,1,128,1,float16,float16,0,1.9157333374023438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,16,8,128,1,float16,fp8,0,3.847349484761556
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,16,8,128,1,float16,float16,0,3.7476800282796225
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,16,16,128,1,float16,float16,0,1.96014404296875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,16,1,128,1,float16,fp8,0,1.914021333058675
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,16,16,128,1,float16,fp8,0,1.9711947441101074
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,16,8,128,1,fp8,fp8,0,3.181088129679362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,16,1,128,1,fp8,fp8,0,1.6760907173156738
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,16,16,128,1,fp8,fp8,0,1.7300480206807454
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,16,2,128,1,float16,float16,0,1.909274737040202
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,16,2,128,1,float16,fp8,0,1.9095946947733562
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,16,2,128,1,fp8,fp8,0,1.6779999732971191
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,16,4,128,1,float16,fp8,0,1.924272060394287
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,16,4,128,1,float16,float16,0,1.922218640645345
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,16,4,128,1,fp8,fp8,0,1.682773272196452
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,16,1,128,1,float16,float16,0,1.0559093157450359
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,16,16,128,1,float16,float16,0,1.0781013170878093
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,16,8,128,1,float16,float16,0,1.940992037455241
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,16,16,128,1,float16,fp8,0,1.0873119831085205
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,16,8,128,1,fp8,fp8,0,1.6991573969523113
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,16,16,128,1,fp8,fp8,0,0.9726346333821615
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,16,1,128,1,float16,fp8,0,1.0654773712158203
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,16,8,128,1,float16,fp8,0,1.9647146860758464
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,16,1,128,1,fp8,fp8,0,0.9470293521881104
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,16,2,128,1,float16,fp8,0,1.0644480387369792
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,16,2,128,1,float16,float16,0,1.0579626560211182
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,16,2,128,1,fp8,fp8,0,0.9494240283966064
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,16,4,128,1,float16,float16,0,1.0627413590749104
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,16,4,128,1,float16,fp8,0,1.0675040086110432
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,16,4,128,1,fp8,fp8,0,0.9524853229522705
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,16,8,128,1,float16,float16,0,1.082533359527588
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,16,8,128,1,float16,fp8,0,1.0757173697153728
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,16,8,128,1,fp8,fp8,0,0.9617066383361816
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,16,1,128,1,float16,float16,0,4.415856043497722
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,16,1,128,1,float16,fp8,0,4.464986801147461
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,16,1,128,1,fp8,fp8,0,3.6539732615152993
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,16,2,128,1,float16,float16,0,4.457146644592285
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,16,2,128,1,float16,fp8,0,4.46175479888916
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,16,2,128,1,fp8,fp8,0,3.6648906071980796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,16,4,128,1,float16,float16,0,4.3694454828898115
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,16,4,128,1,float16,fp8,0,4.272480010986328
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,16,1,128,1,float16,float16,0,2.176682631174723
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,16,4,128,1,fp8,fp8,0,3.683157285054525
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,16,16,128,1,float16,float16,0,2.2710612614949546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,16,16,128,1,float16,fp8,0,2.291541258494059
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,16,8,128,1,float16,float16,0,4.336314519246419
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,16,1,128,1,float16,fp8,0,2.1797547340393066
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,16,16,128,1,fp8,fp8,0,1.9817867279052734
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,16,8,128,1,float16,fp8,0,4.603061358133952
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,16,8,128,1,fp8,fp8,0,3.7292372385660806
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,16,1,128,1,fp8,fp8,0,1.904095967610677
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,16,2,128,1,float16,float16,0,2.1961280504862466
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,16,2,128,1,float16,fp8,0,2.205018679300944
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,16,2,128,1,fp8,fp8,0,1.9082187016805012
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,16,4,128,1,float16,float16,0,2.192725340525309
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,16,4,128,1,fp8,fp8,0,1.9198133150736492
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,16,4,128,1,float16,fp8,0,2.1961386998494468
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,16,8,128,1,float16,float16,0,2.2084320386250815
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,16,1,128,1,float16,float16,0,1.1566027005513508
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,16,8,128,1,float16,fp8,0,2.2097813288370767
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,16,1,128,1,float16,fp8,0,1.1651360193888347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,16,16,128,1,float16,float16,0,1.2144640286763508
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,16,8,128,1,fp8,fp8,0,1.9403200149536133
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,16,16,128,1,float16,fp8,0,1.2076319853464763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,16,1,128,1,fp8,fp8,0,1.0347519715627034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,16,16,128,1,fp8,fp8,0,1.0760587056477864
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,16,2,128,1,float16,float16,0,1.1603573163350422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,16,2,128,1,float16,fp8,0,1.163434664408366
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,16,2,128,1,fp8,fp8,0,1.036463975906372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,16,4,128,1,float16,float16,0,1.18340269724528
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,16,4,128,1,float16,fp8,0,1.1844159762064617
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,16,4,128,1,fp8,fp8,0,1.0437973340352376
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,16,8,128,1,float16,float16,0,1.182037353515625
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,16,16,128,1,float16,float16,0,0.6789173285166422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,16,8,128,1,float16,fp8,0,1.1868213017781575
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,16,16,128,1,float16,fp8,0,0.6853919823964437
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,16,8,128,1,fp8,fp8,0,1.0542079607645671
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,16,16,128,1,fp8,fp8,0,0.6215680042902628
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,16,1,128,1,float16,float16,0,0.659114678700765
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,16,1,128,1,float16,fp8,0,0.6570666631062826
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,16,1,128,1,fp8,fp8,0,0.6021120150883993
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,16,2,128,1,float16,fp8,0,0.6594560146331787
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,16,2,128,1,float16,float16,0,0.6577440102895101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,16,2,128,1,fp8,fp8,0,0.6021066506703695
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,16,4,128,1,float16,float16,0,0.6642346779505411
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,16,4,128,1,fp8,fp8,0,0.605183998743693
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,16,4,128,1,float16,fp8,0,0.6720853646596273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,16,8,128,1,float16,float16,0,0.6707039674123129
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,16,8,128,1,float16,fp8,0,0.6744746367136637
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,16,8,128,1,fp8,fp8,0,0.612010677655538
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,16,1,128,1,float16,float16,0,3.0325705210367837
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,16,1,128,1,fp8,fp8,0,2.6675198872884116
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,16,1,128,1,float16,fp8,0,3.076629320780436
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,16,2,128,1,float16,float16,0,3.097781181335449
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,16,2,128,1,float16,fp8,0,3.121679941813151
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,16,2,128,1,fp8,fp8,0,2.6726293563842773
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,16,4,128,1,float16,float16,0,3.119285265604655
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,16,4,128,1,float16,fp8,0,3.1602560679117837
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,16,1,128,1,float16,float16,0,1.5866880416870117
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,16,4,128,1,fp8,fp8,0,2.690757433573405
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,16,16,128,1,float16,fp8,0,1.6590453783671062
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,16,16,128,1,float16,float16,0,1.66758394241333
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,16,1,128,1,float16,fp8,0,1.5907840728759766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,16,8,128,1,float16,float16,0,3.1650241216023765
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,16,8,128,1,float16,fp8,0,3.1274665196736655
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,16,16,128,1,fp8,fp8,0,1.469098726908366
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,16,8,128,1,fp8,fp8,0,2.7345972061157227
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,16,1,128,1,fp8,fp8,0,1.4011732737223308
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,16,2,128,1,float16,float16,0,1.5853226979573567
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,16,2,128,1,float16,fp8,0,1.6095520655314128
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,16,2,128,1,fp8,fp8,0,1.4069813092549641
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,16,4,128,1,float16,float16,0,1.5894187291463215
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,16,4,128,1,float16,fp8,0,1.6023893356323242
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,16,4,128,1,fp8,fp8,0,1.4127786954243977
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,16,8,128,1,float16,float16,0,1.6296960512797039
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,16,1,128,1,float16,float16,0,0.854698657989502
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,16,16,128,1,float16,float16,0,0.8966773351033529
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,16,8,128,1,float16,fp8,0,1.6170719464619954
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,16,1,128,1,float16,fp8,0,0.8564106623331705
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,16,16,128,1,float16,fp8,0,0.8959999879201254
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,16,8,128,1,fp8,fp8,0,1.4332586924235027
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,16,1,128,1,fp8,fp8,0,0.7727946440378824
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,16,16,128,1,fp8,fp8,0,0.8040106296539307
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,16,2,128,1,float16,float16,0,0.8584746519724528
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,16,2,128,1,float16,fp8,0,0.8690293629964193
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,16,2,128,1,fp8,fp8,0,0.7751839955647787
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,16,4,128,1,float16,float16,0,0.8635733127593994
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,16,4,128,1,float16,fp8,0,0.8659626642862955
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,16,4,128,1,fp8,fp8,0,0.7778986295064291
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,16,8,128,1,float16,float16,0,0.8792853355407715
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,16,1,128,1,float16,float16,0,0.4906719923019409
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,16,8,128,1,float16,fp8,0,0.8741652965545654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,16,8,128,1,fp8,fp8,0,0.7869439919789633
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,16,1,128,1,float16,fp8,0,0.49169600009918213
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,16,16,128,1,float16,float16,0,0.5155839920043945
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,16,16,128,1,float16,fp8,0,0.5183146794637045
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,16,16,128,1,fp8,fp8,0,0.4739360014597575
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,16,1,128,1,fp8,fp8,0,0.4537973403930664
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,16,2,128,1,float16,float16,0,0.4916906754175822
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,16,2,128,1,float16,fp8,0,0.49511468410491943
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,16,2,128,1,fp8,fp8,0,0.45585068066914874
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,16,4,128,1,float16,float16,0,0.4968106746673584
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,16,4,128,1,fp8,fp8,0,0.46030934651692706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,16,4,128,1,float16,fp8,0,0.5025920073191324
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,16,8,128,1,float16,float16,0,0.5053439935048422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,16,8,128,1,float16,fp8,0,0.5108053286870321
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,16,8,128,1,fp8,fp8,0,0.4657493432362874
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,16,1,128,1,float16,fp8,0,4.2103627522786455
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,16,1,128,1,fp8,fp8,0,3.5669174194335938
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,16,1,128,1,float16,float16,0,4.130826632181804
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,16,2,128,1,float16,fp8,0,4.26531187693278
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,16,2,128,1,float16,float16,0,4.174869219462077
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,16,2,128,1,fp8,fp8,0,3.5819520950317383
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,16,4,128,1,float16,fp8,0,4.197733243306478
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,16,4,128,1,float16,float16,0,4.15229860941569
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,16,1,128,1,float16,float16,0,2.0910186767578125
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,16,4,128,1,fp8,fp8,0,3.6051626205444336
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,16,16,128,1,float16,float16,0,2.166447957356771
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,16,16,128,1,float16,fp8,0,2.1917014122009277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,16,1,128,1,float16,fp8,0,2.0913546880086265
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,16,16,128,1,fp8,fp8,0,1.9471626281738281
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,16,8,128,1,float16,float16,0,4.3852799733479815
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,16,8,128,1,float16,fp8,0,4.371285438537598
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,16,1,128,1,fp8,fp8,0,1.8317653338114421
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,16,8,128,1,fp8,fp8,0,3.682821273803711
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,16,2,128,1,float16,fp8,0,2.0776960055033364
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,16,2,128,1,fp8,fp8,0,1.842517375946045
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,16,2,128,1,float16,float16,0,2.0718933741251626
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,16,4,128,1,float16,float16,0,2.0964694023132324
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,16,4,128,1,fp8,fp8,0,1.856170654296875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,16,4,128,1,float16,fp8,0,2.0899786949157715
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,16,8,128,1,float16,float16,0,2.123429298400879
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,16,1,128,1,float16,fp8,0,1.0852693716684978
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,16,1,128,1,float16,float16,0,1.0808266798655193
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,16,8,128,1,float16,fp8,0,2.156874656677246
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,16,16,128,1,float16,float16,0,1.145690679550171
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,16,16,128,1,float16,fp8,0,1.1439733505249023
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,16,8,128,1,fp8,fp8,0,1.8877439498901367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,16,16,128,1,fp8,fp8,0,1.0310133298238118
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,16,1,128,1,fp8,fp8,0,0.976037343343099
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,16,2,128,1,float16,fp8,0,1.087999979654948
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,16,2,128,1,float16,float16,0,1.0900800228118896
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,16,2,128,1,fp8,fp8,0,0.9832320213317871
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,16,4,128,1,float16,float16,0,1.1054080327351887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,16,4,128,1,float16,fp8,0,1.0958507061004639
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,16,4,128,1,fp8,fp8,0,0.9859413305918375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,16,1,128,1,float16,float16,0,0.5922133525212606
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,16,16,128,1,float16,float16,0,0.6227573156356812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,16,8,128,1,float16,float16,0,1.1129120190938313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,16,8,128,1,float16,fp8,0,1.1159946918487549
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,16,8,128,1,fp8,fp8,0,1.0026720364888508
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,16,16,128,1,fp8,fp8,0,0.571733315785726
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,16,1,128,1,float16,fp8,0,0.5952853361765543
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,16,16,128,1,float16,fp8,0,0.6347093184789022
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,16,1,128,1,fp8,fp8,0,0.5468159914016724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,16,2,128,1,float16,float16,0,0.5969866514205933
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,16,2,128,1,float16,fp8,0,0.5990399916966757
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,16,2,128,1,fp8,fp8,0,0.5481866598129272
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,16,4,128,1,float16,float16,0,0.6014293432235718
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,16,4,128,1,float16,fp8,0,0.6082559823989868
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,16,4,128,1,fp8,fp8,0,0.5519306659698486
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,16,8,128,1,float16,fp8,0,0.6130346854527792
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,16,16,128,1,float16,float16,0,0.3705173333485921
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,16,8,128,1,float16,float16,0,0.6161119937896729
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,16,8,128,1,fp8,fp8,0,0.5594559907913208
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,16,1,128,1,float16,float16,0,0.35447466373443604
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,16,1,128,1,float16,fp8,0,0.3541333278020223
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,16,16,128,1,float16,fp8,0,0.37461332480112713
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,16,16,128,1,fp8,fp8,0,0.3466240167617798
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,16,1,128,1,fp8,fp8,0,0.32784533500671387
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,16,2,128,1,float16,float16,0,0.35310399532318115
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,16,2,128,1,fp8,fp8,0,0.3295573393503825
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,16,4,128,1,float16,fp8,0,0.3596106767654419
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,16,4,128,1,float16,float16,0,0.3561813433965047
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,16,4,128,1,fp8,fp8,0,0.33398934205373126
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,16,2,128,1,float16,fp8,0,0.355840007464091
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,16,8,128,1,float16,float16,0,0.36163731416066486
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,16,8,128,1,float16,fp8,0,0.36505599816640216
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,16,8,128,1,fp8,fp8,0,0.33843199412027997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,16,1,128,1,float16,float16,0,2.4937493006388345
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,16,1,128,1,fp8,fp8,0,2.234538714090983
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,16,1,128,1,float16,fp8,0,2.4927573204040527
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,16,2,128,1,float16,float16,0,2.5115307172139487
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,16,2,128,1,fp8,fp8,0,2.246826648712158
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,16,2,128,1,float16,fp8,0,2.5383307139078775
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,16,4,128,1,float16,float16,0,2.5209172566731772
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,16,4,128,1,float16,fp8,0,2.535594622294108
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,16,1,128,1,float16,float16,0,1.2852906386057537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,16,4,128,1,fp8,fp8,0,2.2649173736572266
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,16,16,128,1,float16,float16,0,1.3540639877319336
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,16,1,128,1,float16,fp8,0,1.2880319754282634
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,16,16,128,1,float16,fp8,0,1.391599973042806
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,16,8,128,1,float16,float16,0,2.594303925832113
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,16,16,128,1,fp8,fp8,0,1.244165341059367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,16,8,128,1,fp8,fp8,0,2.316293398539225
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,16,8,128,1,float16,fp8,0,2.5789440472920737
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,16,1,128,1,fp8,fp8,0,1.1583147048950195
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,16,2,128,1,float16,float16,0,1.282901366551717
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,16,2,128,1,float16,fp8,0,1.292464017868042
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,16,2,128,1,fp8,fp8,0,1.1668480237325032
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,16,4,128,1,float16,float16,0,1.3096906344095867
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,16,4,128,1,float16,fp8,0,1.2975786526997883
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,16,4,128,1,fp8,fp8,0,1.1743573347727458
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,16,8,128,1,float16,float16,0,1.3209599653879802
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,16,8,128,1,float16,fp8,0,1.3274346987406414
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,16,1,128,1,float16,float16,0,0.6789173285166422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,16,1,128,1,float16,fp8,0,0.6823253631591797
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,16,16,128,1,float16,float16,0,0.7222613493601481
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,16,8,128,1,fp8,fp8,0,1.1991146405537922
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,16,1,128,1,fp8,fp8,0,0.6251680056254069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,16,2,128,1,float16,float16,0,0.6843732992808024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,16,16,128,1,fp8,fp8,0,0.6652586857477824
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,16,2,128,1,float16,fp8,0,0.6840373675028483
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,16,16,128,1,float16,fp8,0,0.7376266320546468
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,16,2,128,1,fp8,fp8,0,0.626853346824646
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,16,4,128,1,float16,float16,0,0.6925600369771322
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,16,4,128,1,float16,fp8,0,0.6918826897939047
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,16,4,128,1,fp8,fp8,0,0.6326666673024496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,16,16,128,1,float16,fp8,0,0.40994131565093994
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,16,1,128,1,float16,float16,0,0.38042132059733075
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,16,8,128,1,float16,float16,0,0.7000532944997152
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,16,8,128,1,float16,fp8,0,0.7089493274688721
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,16,16,128,1,float16,float16,0,0.40721599260965985
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,16,8,128,1,fp8,fp8,0,0.6439040104548136
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,16,16,128,1,fp8,fp8,0,0.37836798032124835
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,16,1,128,1,float16,fp8,0,0.3824746608734131
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,16,1,128,1,fp8,fp8,0,0.3575413227081299
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,16,2,128,1,float16,float16,0,0.3821226755777995
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,16,2,128,1,float16,fp8,0,0.3831466833750407
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,16,2,128,1,fp8,fp8,0,0.3595893383026123
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,16,4,128,1,float16,fp8,0,0.388810674349467
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,16,4,128,1,fp8,fp8,0,0.3619840145111084
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,16,4,128,1,float16,float16,0,0.3914986848831177
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,16,8,128,1,float16,float16,0,0.39424534638722736
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,16,16,128,1,float16,float16,0,0.2491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,16,1,128,1,float16,float16,0,0.2362026572227478
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,16,16,128,1,float16,fp8,0,0.2515573302904765
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,16,8,128,1,float16,fp8,0,0.3983360131581624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,16,8,128,1,fp8,fp8,0,0.3677866856257121
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,16,1,128,1,fp8,fp8,0,0.2239146629969279
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,16,16,128,1,fp8,fp8,0,0.23517866929372153
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,16,1,128,1,float16,fp8,0,0.23824532826741537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,16,2,128,1,float16,fp8,0,0.23654399315516153
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,16,2,128,1,float16,float16,0,0.23858133951822916
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,16,2,128,1,fp8,fp8,0,0.22220800320307413
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,16,4,128,1,fp8,fp8,0,0.2239146629969279
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,16,4,128,1,float16,float16,0,0.23757332563400269
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,16,4,128,1,float16,fp8,0,0.2392746607462565
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,16,8,128,1,float16,float16,0,0.24132800102233887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,16,8,128,1,fp8,fp8,0,0.2283573349316915
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,16,8,128,1,float16,fp8,0,0.24269866943359375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,16,1,128,1,float16,fp8,0,2.6021599769592285
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,16,1,128,1,fp8,fp8,0,2.3761919339497886
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,16,1,128,1,float16,float16,0,2.584442615509033
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,16,2,128,1,float16,float16,0,2.6028587023417153
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,16,2,128,1,float16,fp8,0,2.593957265218099
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,16,2,128,1,fp8,fp8,0,2.400426705678304
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,16,4,128,1,float16,float16,0,2.624512036641439
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,16,4,128,1,float16,fp8,0,2.663088003794352
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,16,1,128,1,float16,float16,0,1.3264266649882
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,16,4,128,1,fp8,fp8,0,2.4263787269592285
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,16,16,128,1,float16,float16,0,1.405610720316569
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,16,16,128,1,float16,fp8,0,1.4206453959147136
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,16,1,128,1,float16,fp8,0,1.3257439931233723
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,16,16,128,1,fp8,fp8,0,1.3254026571909587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,16,8,128,1,float16,float16,0,2.679445266723633
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,16,8,128,1,float16,fp8,0,2.6866188049316406
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,16,8,128,1,fp8,fp8,0,2.488149325052897
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,16,1,128,1,fp8,fp8,0,1.2175359725952148
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,16,2,128,1,float16,float16,0,1.3097013632456462
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,16,2,128,1,float16,fp8,0,1.3100266456604004
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,16,2,128,1,fp8,fp8,0,1.228122631708781
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,16,4,128,1,float16,float16,0,1.328111966451009
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,16,4,128,1,float16,fp8,0,1.328816016515096
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,16,4,128,1,fp8,fp8,0,1.2315306663513184
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,16,8,128,1,float16,float16,0,1.3602186838785808
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,16,8,128,1,float16,fp8,0,1.3765974044799805
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,16,1,128,1,float16,float16,0,0.683690627415975
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,16,1,128,1,float16,fp8,0,0.6840319633483887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,16,16,128,1,float16,float16,0,0.7311360041300455
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,16,8,128,1,fp8,fp8,0,1.2661759853363037
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,16,1,128,1,fp8,fp8,0,0.6418720086415609
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,16,16,128,1,float16,fp8,0,0.7512693405151367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,16,16,128,1,fp8,fp8,0,0.6980319817860922
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,16,2,128,1,float16,float16,0,0.6997173627217611
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,16,2,128,1,float16,fp8,0,0.6884693304697672
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,16,2,128,1,fp8,fp8,0,0.6418826580047607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,16,4,128,1,float16,float16,0,0.6922240257263184
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,16,4,128,1,float16,fp8,0,0.6939307053883871
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,16,4,128,1,fp8,fp8,0,0.6493813196818033
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,16,16,128,1,float16,float16,0,0.40004265308380127
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,16,1,128,1,float16,float16,0,0.3705226580301921
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,16,16,128,1,float16,fp8,0,0.40345601240793866
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,16,8,128,1,float16,fp8,0,0.7137227058410645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,16,8,128,1,fp8,fp8,0,0.6673173109690348
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,16,16,128,1,fp8,fp8,0,0.3811039924621582
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,16,1,128,1,float16,fp8,0,0.3715413411458333
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,16,8,128,1,float16,float16,0,0.7157759666442871
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,16,1,128,1,fp8,fp8,0,0.35140268007914227
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,16,2,128,1,float16,float16,0,0.3739253282546997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,16,2,128,1,float16,fp8,0,0.3749493360519409
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,16,2,128,1,fp8,fp8,0,0.3558453321456909
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,16,4,128,1,float16,fp8,0,0.3804159959157308
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,16,4,128,1,float16,float16,0,0.3800693353017171
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,16,4,128,1,fp8,fp8,0,0.3561813433965047
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,16,8,128,1,float16,float16,0,0.3896373510360718
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,16,1,128,1,float16,fp8,0,0.21846399704615274
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,16,1,128,1,float16,float16,0,0.21708800395329794
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,16,8,128,1,float16,fp8,0,0.3887786865234375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,16,16,128,1,float16,fp8,0,0.23688532908757529
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,16,16,128,1,float16,float16,0,0.23586134115854898
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,16,16,128,1,fp8,fp8,0,0.22289599974950156
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,16,8,128,1,fp8,fp8,0,0.368122657140096
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,16,2,128,1,float16,fp8,0,0.2194719910621643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,16,2,128,1,float16,float16,0,0.21845332781473795
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,16,1,128,1,fp8,fp8,0,0.20377600193023682
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,16,2,128,1,fp8,fp8,0,0.20651199420293173
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,16,4,128,1,float16,float16,0,0.2208426594734192
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,16,4,128,1,float16,fp8,0,0.22254933913548788
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,16,4,128,1,fp8,fp8,0,0.21094399690628052
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,16,8,128,1,float16,float16,0,0.22698666652043661
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,16,8,128,1,fp8,fp8,0,0.2167466680208842
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,16,8,128,1,float16,fp8,0,0.2290346622467041
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,16,1,128,1,float16,float16,0,0.14729066689809164
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,16,1,128,1,float16,fp8,0,0.14591999848683676
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,16,16,128,1,float16,fp8,0,0.15171200037002563
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,16,1,128,1,fp8,fp8,0,0.13874666889508566
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,16,16,128,1,float16,float16,0,0.15123200416564941
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,16,16,128,1,fp8,fp8,0,0.1431893308957418
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,16,2,128,1,float16,float16,0,0.14574933052062988
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,16,2,128,1,float16,fp8,0,0.14763200283050537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,16,2,128,1,fp8,fp8,0,0.1384106675783793
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,16,4,128,1,float16,float16,0,0.1467680037021637
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,16,4,128,1,float16,fp8,0,0.1476426621278127
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,16,4,128,1,fp8,fp8,0,0.13909332950909933
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,16,8,128,1,fp8,fp8,0,0.14096533258756003
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,16,8,128,1,float16,fp8,0,0.14779200156529745
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,16,8,128,1,float16,float16,0,0.14793599645296732
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,16,1,128,1,float16,float16,0,1.6889173189798992
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,16,1,128,1,float16,fp8,0,1.6786719957987468
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,16,1,128,1,fp8,fp8,0,1.5965867042541504
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,16,2,128,1,float16,float16,0,1.6875519752502441
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,16,2,128,1,fp8,fp8,0,1.6112640698750813
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,16,2,128,1,float16,fp8,0,1.6899466514587402
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,16,4,128,1,float16,float16,0,1.7143467267354329
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,16,4,128,1,float16,fp8,0,1.7348267237345378
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,16,1,128,1,float16,float16,0,0.8707413673400879
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,16,4,128,1,fp8,fp8,0,1.6341333389282227
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,16,16,128,1,float16,float16,0,0.9323466618855795
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,16,16,128,1,float16,fp8,0,0.9395306905110677
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,16,8,128,1,float16,float16,0,1.7477973302205403
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,16,8,128,1,float16,fp8,0,1.7553013165791829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,16,16,128,1,fp8,fp8,0,0.9035092989603678
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,16,1,128,1,float16,fp8,0,0.869376023610433
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,16,1,128,1,fp8,fp8,0,0.8231200377146403
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,16,8,128,1,fp8,fp8,0,1.6774826049804688
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,16,2,128,1,float16,float16,0,0.8663146495819092
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,16,2,128,1,float16,fp8,0,0.8669919967651367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,16,2,128,1,fp8,fp8,0,0.8241600195566813
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,16,4,128,1,float16,float16,0,0.8765386740366617
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,16,4,128,1,float16,fp8,0,0.879957358042399
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,16,4,128,1,fp8,fp8,0,0.8403626283009847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,16,8,128,1,float16,fp8,0,0.8997546831766764
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,16,8,128,1,float16,float16,0,0.899072011311849
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,16,16,128,1,float16,float16,0,0.4913546641667684
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,16,1,128,1,float16,float16,0,0.45892266432444256
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,16,1,128,1,float16,fp8,0,0.4596266746520996
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,16,8,128,1,fp8,fp8,0,0.8601653575897217
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,16,1,128,1,fp8,fp8,0,0.43537068367004395
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,16,16,128,1,float16,fp8,0,0.4957866668701172
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,16,2,128,1,float16,float16,0,0.4609760046005249
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,16,16,128,1,fp8,fp8,0,0.47599466641743976
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,16,2,128,1,float16,fp8,0,0.45823999245961505
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,16,2,128,1,fp8,fp8,0,0.44151465098063153
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,16,4,128,1,float16,fp8,0,0.46745598316192627
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,16,4,128,1,fp8,fp8,0,0.44390400250752765
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,16,4,128,1,float16,float16,0,0.46984533468882245
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,16,8,128,1,float16,float16,0,0.47189335028330487
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,16,8,128,1,float16,fp8,0,0.4821386734644572
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,16,8,128,1,fp8,fp8,0,0.4551733334859212
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,16,16,128,1,float16,float16,0,0.2732479969660441
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,16,1,128,1,float16,float16,0,0.2532693346341451
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,16,16,128,1,float16,fp8,0,0.2769706646601359
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,16,1,128,1,float16,fp8,0,0.25326399008433026
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,16,16,128,1,fp8,fp8,0,0.2657279968261719
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,16,1,128,1,fp8,fp8,0,0.2426933248837789
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,16,2,128,1,fp8,fp8,0,0.24541334311167398
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,16,2,128,1,float16,float16,0,0.2556533416112264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,16,2,128,1,float16,fp8,0,0.2539520064989726
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,16,4,128,1,float16,float16,0,0.25804799795150757
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,16,4,128,1,float16,fp8,0,0.25993067026138306
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,16,4,128,1,fp8,fp8,0,0.2512213389078776
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,16,1,128,1,float16,float16,0,0.15103999773661295
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,16,8,128,1,float16,float16,0,0.26436267296473187
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,16,16,128,1,float16,float16,0,0.16127999623616537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,16,16,128,1,float16,fp8,0,0.16503467162450156
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,16,16,128,1,fp8,fp8,0,0.15940266847610474
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,16,8,128,1,float16,fp8,0,0.2664159933725993
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,16,8,128,1,fp8,fp8,0,0.2539520064989726
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,16,1,128,1,fp8,fp8,0,0.14267733693122864
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,16,2,128,1,float16,float16,0,0.1508693297704061
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,16,1,128,1,float16,fp8,0,0.15223999818166098
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,16,4,128,1,float16,float16,0,0.1532586713631948
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,16,2,128,1,fp8,fp8,0,0.1423466702302297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,16,2,128,1,float16,fp8,0,0.15104533235232034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,16,4,128,1,float16,fp8,0,0.15530666708946228
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,16,4,128,1,fp8,fp8,0,0.1454080045223236
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,16,8,128,1,float16,float16,0,0.15428266922632852
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,16,1,128,1,float16,float16,0,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,16,8,128,1,float16,fp8,0,0.1566933294137319
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,16,16,128,1,float16,float16,0,0.10410666465759277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,16,16,128,1,float16,fp8,0,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,16,16,128,1,fp8,fp8,0,0.10070400436719258
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,16,1,128,1,fp8,fp8,0,0.09762133161226909
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,16,1,128,1,float16,fp8,0,0.1037600040435791
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,16,2,128,1,float16,float16,0,0.10341333349545796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,16,8,128,1,fp8,fp8,0,0.1530933380126953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,16,2,128,1,float16,fp8,0,0.10376532872517903
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,16,2,128,1,fp8,fp8,0,0.09796266754468282
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,16,4,128,1,float16,float16,0,0.10240000486373901
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,16,4,128,1,fp8,fp8,0,0.09796266754468282
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,16,4,128,1,float16,fp8,0,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,16,8,128,1,float16,float16,0,0.10307733217875163
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,16,8,128,1,float16,fp8,0,0.10547733306884766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,16,8,128,1,fp8,fp8,0,0.09898133079210918
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,16,1,128,1,float16,float16,0,1.956698735555013
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,16,1,128,1,float16,fp8,0,1.9553279876708984
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,16,1,128,1,fp8,fp8,0,1.90446933110555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,16,2,128,1,float16,float16,0,1.9807573954264324
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,16,2,128,1,float16,fp8,0,1.9783679644266765
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,16,2,128,1,fp8,fp8,0,1.9215359687805176
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,16,4,128,1,float16,float16,0,1.9838347434997559
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,16,4,128,1,float16,fp8,0,2.0145546595255532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,16,1,128,1,float16,float16,0,0.9907253583272299
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,16,4,128,1,fp8,fp8,0,1.9522612889607747
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,16,16,128,1,float16,float16,0,1.0712746779123943
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,16,16,128,1,float16,fp8,0,1.0746880372365315
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,16,1,128,1,float16,fp8,0,0.9917439619700114
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,16,8,128,1,float16,float16,0,2.0616532961527505
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,16,16,128,1,fp8,fp8,0,1.0637653668721516
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,16,8,128,1,float16,fp8,0,2.0616532961527505
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,16,1,128,1,fp8,fp8,0,0.9644373257954916
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,16,8,128,1,fp8,fp8,0,2.022058645884196
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,16,2,128,1,float16,float16,0,0.989354689915975
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,16,2,128,1,float16,fp8,0,0.9859413305918375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,16,2,128,1,fp8,fp8,0,0.9777812957763672
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,16,4,128,1,float16,float16,0,0.9954986572265625
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,16,4,128,1,float16,fp8,0,0.9958399931589762
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,16,4,128,1,fp8,fp8,0,0.9794560273488363
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,16,8,128,1,float16,float16,0,1.0282506942749023
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,16,8,128,1,float16,fp8,0,1.037829319636027
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,16,1,128,1,float16,float16,0,0.504314661026001
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,16,1,128,1,float16,fp8,0,0.5073920090993246
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,16,16,128,1,float16,float16,0,0.5505706469217936
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,16,8,128,1,fp8,fp8,0,1.017685333887736
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,16,1,128,1,fp8,fp8,0,0.5009066661198934
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,16,2,128,1,float16,float16,0,0.5094399849573771
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,16,16,128,1,fp8,fp8,0,0.5499093135197958
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,16,16,128,1,float16,fp8,0,0.5618346532185873
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,16,2,128,1,float16,fp8,0,0.5104586680730184
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,16,2,128,1,fp8,fp8,0,0.5012480020523071
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,16,4,128,1,float16,float16,0,0.5131839911142985
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,16,4,128,1,float16,fp8,0,0.5183146794637045
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,16,4,128,1,fp8,fp8,0,0.509770671526591
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,16,16,128,1,float16,float16,0,0.2961066762606303
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,16,1,128,1,float16,float16,0,0.27050666014353436
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,16,16,128,1,float16,fp8,0,0.29814932743708294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,16,16,128,1,fp8,fp8,0,0.2950826684633891
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,16,8,128,1,float16,fp8,0,0.5318026542663574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,16,8,128,1,float16,float16,0,0.5321386655171713
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,16,1,128,1,float16,fp8,0,0.2705013354619344
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,16,8,128,1,fp8,fp8,0,0.5290720065434774
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,16,1,128,1,fp8,fp8,0,0.26709334055582684
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,16,2,128,1,float16,float16,0,0.27187200387318927
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,16,2,128,1,float16,fp8,0,0.2725386619567871
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,16,2,128,1,fp8,fp8,0,0.2701653242111206
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,16,4,128,1,float16,float16,0,0.2762986620267232
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,16,4,128,1,float16,fp8,0,0.2773333390553792
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,16,4,128,1,fp8,fp8,0,0.27187200387318927
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,16,1,128,1,float16,float16,0,0.14830399552981058
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,16,8,128,1,float16,float16,0,0.2868906656901042
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,16,16,128,1,float16,fp8,0,0.16859734058380127
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,16,1,128,1,float16,fp8,0,0.14865066607793173
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,16,16,128,1,float16,float16,0,0.16554666558901468
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,16,8,128,1,float16,fp8,0,0.28484266996383667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,16,8,128,1,fp8,fp8,0,0.2821066578229268
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,16,16,128,1,fp8,fp8,0,0.16520532965660095
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,16,1,128,1,fp8,fp8,0,0.145578662554423
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,16,2,128,1,fp8,fp8,0,0.14727466305096945
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,16,2,128,1,float16,fp8,0,0.14898133277893066
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,16,4,128,1,float16,float16,0,0.1527466674645742
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,16,2,128,1,float16,float16,0,0.1525759994983673
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,16,4,128,1,fp8,fp8,0,0.15189866224924722
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,16,4,128,1,float16,fp8,0,0.15377599994341531
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,16,8,128,1,float16,float16,0,0.15889066457748413
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,16,8,128,1,float16,fp8,0,0.1609440048535665
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,16,1,128,1,float16,float16,0,0.09591466188430786
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,16,1,128,1,fp8,fp8,0,0.09147733449935913
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,16,16,128,1,float16,float16,0,0.09966933727264404
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,16,16,128,1,float16,fp8,0,0.10240000486373901
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,16,8,128,1,fp8,fp8,0,0.15923733512560526
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,16,1,128,1,float16,fp8,0,0.09489599863688152
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,16,16,128,1,fp8,fp8,0,0.10205866893132527
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,16,2,128,1,float16,float16,0,0.09590400258700053
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,16,2,128,1,float16,fp8,0,0.09523199995358785
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,16,2,128,1,fp8,fp8,0,0.09284266829490662
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,16,4,128,1,float16,float16,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,16,4,128,1,float16,fp8,0,0.09830400347709656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,16,4,128,1,fp8,fp8,0,0.09351999560991923
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,16,8,128,1,float16,float16,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,16,16,128,1,float16,float16,0,0.06417599817117055
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,16,1,128,1,float16,float16,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,16,8,128,1,fp8,fp8,0,0.09488532940546672
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,16,16,128,1,float16,fp8,0,0.06689600149790446
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,16,1,128,1,float16,fp8,0,0.06380266447861989
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,16,8,128,1,float16,fp8,0,0.09967466195424397
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,16,16,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,16,1,128,1,fp8,fp8,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,16,2,128,1,float16,fp8,0,0.06417599817117055
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,16,2,128,1,float16,float16,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,16,2,128,1,fp8,fp8,0,0.060421332716941833
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,16,4,128,1,float16,float16,0,0.0641653339068095
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,16,8,128,1,float16,float16,0,0.06452266871929169
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,16,4,128,1,fp8,fp8,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,16,8,128,1,fp8,fp8,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,16,8,128,1,float16,fp8,0,0.06588266789913177
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,16,4,128,1,float16,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,16,1,128,1,float16,float16,0,1.3502880732218425
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,16,1,128,1,float16,fp8,0,1.3503093719482422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,16,1,128,1,fp8,fp8,0,1.344853401184082
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,16,2,128,1,float16,float16,0,1.3550933202107747
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,16,2,128,1,float16,fp8,0,1.3714720408121746
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,16,2,128,1,fp8,fp8,0,1.3591893513997395
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,16,4,128,1,float16,float16,0,1.36738125483195
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,16,4,128,1,float16,fp8,0,1.377962589263916
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,16,1,128,1,float16,float16,0,0.687445322672526
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,16,4,128,1,fp8,fp8,0,1.3813707033793132
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,16,16,128,1,float16,float16,0,0.7557120323181152
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,16,16,128,1,float16,fp8,0,0.7597920099894205
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,16,1,128,1,float16,fp8,0,0.6884480317433676
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,16,8,128,1,float16,float16,0,1.4172159830729167
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,16,8,128,1,float16,fp8,0,1.4158506393432617
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,16,16,128,1,fp8,fp8,0,0.7693653106689453
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,16,8,128,1,fp8,fp8,0,1.4274506568908691
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,16,1,128,1,fp8,fp8,0,0.6894933382670084
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,16,2,128,1,float16,fp8,0,0.6908693313598633
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,16,2,128,1,float16,float16,0,0.6976853211720785
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,16,2,128,1,fp8,fp8,0,0.6963146527608236
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,16,4,128,1,float16,float16,0,0.702789306640625
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,16,4,128,1,float16,fp8,0,0.7031359672546387
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,16,4,128,1,fp8,fp8,0,0.7048587004343668
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,16,8,128,1,float16,float16,0,0.7167999744415283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,16,1,128,1,float16,float16,0,0.3592533270517985
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,16,8,128,1,float16,fp8,0,0.724997361501058
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,16,1,128,1,float16,fp8,0,0.35891199111938477
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,16,16,128,1,float16,float16,0,0.3973173300425212
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,16,16,128,1,float16,fp8,0,0.398357351620992
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,16,1,128,1,fp8,fp8,0,0.3633333444595337
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,16,16,128,1,fp8,fp8,0,0.4021120071411133
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,16,8,128,1,fp8,fp8,0,0.7270452976226807
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,16,2,128,1,float16,float16,0,0.3609386682510376
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,16,2,128,1,float16,fp8,0,0.36369601885477704
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,16,2,128,1,fp8,fp8,0,0.3660800059636434
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,16,4,128,1,float16,float16,0,0.36878931522369385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,16,4,128,1,float16,fp8,0,0.3694933255513509
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,16,4,128,1,fp8,fp8,0,0.3671146631240845
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,16,8,128,1,float16,float16,0,0.37666134039560956
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,16,16,128,1,float16,float16,0,0.21333332856496176
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,16,8,128,1,float16,fp8,0,0.37836798032124835
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,16,16,128,1,float16,fp8,0,0.2187946637471517
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,16,8,128,1,fp8,fp8,0,0.3787039915720622
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,16,1,128,1,float16,float16,0,0.19029333194096884
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,16,1,128,1,float16,fp8,0,0.19302932421366373
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,16,1,128,1,fp8,fp8,0,0.19779733816782633
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,16,2,128,1,float16,float16,0,0.19507733980814615
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,16,16,128,1,fp8,fp8,0,0.2194719910621643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,16,2,128,1,float16,fp8,0,0.1945599913597107
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,16,2,128,1,fp8,fp8,0,0.19933867454528809
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,16,4,128,1,float16,float16,0,0.2002293268839518
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,16,4,128,1,float16,fp8,0,0.20070399840672812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,16,4,128,1,fp8,fp8,0,0.20121600230534872
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,16,8,128,1,float16,float16,0,0.20394132534662882
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,16,16,128,1,float16,float16,0,0.12325333555539449
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,16,16,128,1,float16,fp8,0,0.12492266297340393
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,16,8,128,1,float16,fp8,0,0.20787199338277182
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,16,1,128,1,float16,fp8,0,0.1116426686445872
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,16,1,128,1,float16,float16,0,0.11240532994270325
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,16,16,128,1,fp8,fp8,0,0.12662933270136514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,16,8,128,1,fp8,fp8,0,0.20923733711242676
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,16,1,128,1,fp8,fp8,0,0.11162133018175761
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,16,2,128,1,float16,float16,0,0.11400533715883891
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,16,2,128,1,fp8,fp8,0,0.10990933577219646
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,16,2,128,1,float16,fp8,0,0.11366400122642517
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,16,4,128,1,float16,float16,0,0.11366400122642517
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,16,4,128,1,fp8,fp8,0,0.11264000336329143
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,16,4,128,1,float16,fp8,0,0.11502933502197266
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,16,8,128,1,float16,float16,0,0.11947733163833618
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,16,1,128,1,float16,float16,0,0.07372800012429555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,16,8,128,1,float16,fp8,0,0.11773866415023804
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,16,1,128,1,float16,fp8,0,0.0744053324063619
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,16,16,128,1,float16,float16,0,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,16,16,128,1,float16,fp8,0,0.07851199805736542
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,16,8,128,1,fp8,fp8,0,0.1204906702041626
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,16,16,128,1,fp8,fp8,0,0.07784000039100647
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,16,2,128,1,float16,float16,0,0.07372266550858815
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,16,2,128,1,float16,fp8,0,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,16,2,128,1,fp8,fp8,0,0.07372266550858815
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,16,1,128,1,fp8,fp8,0,0.0730506678422292
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,16,4,128,1,float16,float16,0,0.07474666833877563
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,16,4,128,1,float16,fp8,0,0.0747573326031367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,16,8,128,1,float16,fp8,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,16,4,128,1,fp8,fp8,0,0.07409599920113881
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,16,8,128,1,float16,float16,0,0.07441066702206929
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,16,8,128,1,fp8,fp8,0,0.07441066702206929
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,16,16,128,1,float16,float16,0,0.053247998158137
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,16,1,128,1,float16,fp8,0,0.05357866485913595
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,16,16,128,1,fp8,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,16,1,128,1,float16,float16,0,0.05359466870625814
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,16,1,128,1,fp8,fp8,0,0.05222400029500326
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,16,16,128,1,float16,fp8,0,0.05359466870625814
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,16,2,128,1,float16,fp8,0,0.05188799897829691
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,16,2,128,1,float16,float16,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,16,2,128,1,fp8,fp8,0,0.049498667319615684
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,16,4,128,1,float16,float16,0,0.05359466870625814
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,16,8,128,1,float16,float16,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,16,4,128,1,fp8,fp8,0,0.05153599878152212
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,16,4,128,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,16,8,128,1,fp8,fp8,0,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,16,8,128,1,float16,fp8,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,16,1,128,1,float16,float16,0,1.5076692899068196
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,16,1,128,1,float16,fp8,0,1.5032159487406414
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,16,1,128,1,fp8,fp8,0,1.56876802444458
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,16,2,128,1,float16,float16,0,1.5138079325358074
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,16,2,128,1,float16,fp8,0,1.5073280334472656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,16,2,128,1,fp8,fp8,0,1.6259466807047527
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,16,4,128,1,float16,float16,0,1.5877013206481934
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,16,4,128,1,float16,fp8,0,1.5701173146565754
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,16,1,128,1,float16,float16,0,0.7700426578521729
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,16,4,128,1,fp8,fp8,0,1.8218560218811035
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,16,16,128,1,float16,float16,0,0.8584799766540527
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,16,16,128,1,float16,fp8,0,0.8441226482391357
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,16,1,128,1,float16,fp8,0,0.7676800092061361
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,16,16,128,1,fp8,fp8,0,0.9007786909739176
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,16,8,128,1,float16,fp8,0,1.5609173774719238
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,16,8,128,1,float16,float16,0,1.5788426399230957
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,16,1,128,1,fp8,fp8,0,0.7968426545461019
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,16,8,128,1,fp8,fp8,0,1.8126133282979329
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,16,2,128,1,float16,float16,0,0.7676586310068766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,16,2,128,1,float16,fp8,0,0.7710666656494141
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,16,2,128,1,fp8,fp8,0,0.8149387041727701
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,16,4,128,1,float16,float16,0,0.7961706320444742
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,16,4,128,1,float16,fp8,0,0.7866026560465494
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,16,4,128,1,fp8,fp8,0,0.905898650487264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,16,8,128,1,float16,float16,0,0.8009386857350668
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,16,1,128,1,float16,float16,0,0.3928746779759725
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,16,8,128,1,float16,fp8,0,0.7907040119171143
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,16,16,128,1,float16,float16,0,0.43503467241923016
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,16,1,128,1,float16,fp8,0,0.39185067017873126
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,16,16,128,1,float16,fp8,0,0.42956801255544025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,16,8,128,1,fp8,fp8,0,0.8833653132120768
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,16,1,128,1,fp8,fp8,0,0.4089173475901286
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,16,16,128,1,fp8,fp8,0,0.45448533693949383
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,16,2,128,1,float16,float16,0,0.3949226538340251
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,16,2,128,1,float16,fp8,0,0.39492801825205487
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,16,2,128,1,fp8,fp8,0,0.41761600971221924
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,16,4,128,1,float16,float16,0,0.407914678255717
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,16,4,128,1,fp8,fp8,0,0.449343999226888
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,16,4,128,1,float16,fp8,0,0.4078933397928874
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,16,16,128,1,float16,float16,0,0.2303946614265442
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,16,16,128,1,float16,fp8,0,0.22458666563034058
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,16,8,128,1,float16,float16,0,0.4113066593805949
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,16,1,128,1,float16,float16,0,0.20957867304484049
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,16,8,128,1,float16,fp8,0,0.4068746566772461
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,16,8,128,1,fp8,fp8,0,0.4456106821695964
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,16,1,128,1,float16,fp8,0,0.20957867304484049
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,16,16,128,1,fp8,fp8,0,0.23790399233500162
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,16,2,128,1,float16,fp8,0,0.2082186738650004
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,16,1,128,1,fp8,fp8,0,0.21434666713078818
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,16,2,128,1,float16,float16,0,0.21195199092229208
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,16,2,128,1,fp8,fp8,0,0.21811199188232422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,16,4,128,1,float16,float16,0,0.21845332781473795
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,16,4,128,1,float16,fp8,0,0.21504000822703043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,16,4,128,1,fp8,fp8,0,0.2362026572227478
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,16,8,128,1,float16,float16,0,0.2201546629269918
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,16,1,128,1,float16,float16,0,0.11366400122642517
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,16,16,128,1,float16,float16,0,0.1269653340180715
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,16,8,128,1,float16,fp8,0,0.21504000822703043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,16,16,128,1,fp8,fp8,0,0.13126933574676514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,16,16,128,1,float16,fp8,0,0.1252959966659546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,16,8,128,1,fp8,fp8,0,0.23073599735895792
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,16,1,128,1,fp8,fp8,0,0.115365336338679
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,16,2,128,1,float16,float16,0,0.11434666315714519
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,16,1,128,1,float16,fp8,0,0.11365333199501038
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,16,2,128,1,fp8,fp8,0,0.11741866668065389
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,16,2,128,1,float16,fp8,0,0.11366400122642517
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,16,4,128,1,float16,float16,0,0.11776533722877502
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,16,4,128,1,float16,fp8,0,0.11569066842397054
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,16,4,128,1,fp8,fp8,0,0.12424533565839131
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,16,8,128,1,float16,float16,0,0.12185600399971008
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,16,8,128,1,float16,fp8,0,0.11947733163833618
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,16,1,128,1,float16,float16,0,0.06586666901906331
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,16,1,128,1,float16,fp8,0,0.06861333549022675
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,16,16,128,1,float16,float16,0,0.07236266632874806
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,16,1,128,1,fp8,fp8,0,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,16,16,128,1,float16,fp8,0,0.07236266632874806
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,16,16,128,1,fp8,fp8,0,0.07407466570536296
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,16,8,128,1,fp8,fp8,0,0.12458667159080505
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,16,2,128,1,float16,float16,0,0.06724266707897186
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,16,2,128,1,float16,fp8,0,0.06794666747252147
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,16,2,128,1,fp8,fp8,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,16,4,128,1,float16,fp8,0,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,16,4,128,1,float16,float16,0,0.06962666908899943
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,16,8,128,1,float16,float16,0,0.06929600238800049
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,16,4,128,1,fp8,fp8,0,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,16,8,128,1,fp8,fp8,0,0.07028799752394359
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,16,1,128,1,float16,float16,0,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,16,16,128,1,float16,float16,0,0.043375998735427856
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,16,1,128,1,float16,fp8,0,0.04027733455101649
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,16,16,128,1,fp8,fp8,0,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,16,16,128,1,float16,fp8,0,0.04334400097529093
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,16,8,128,1,float16,fp8,0,0.07066133121649425
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,16,1,128,1,fp8,fp8,0,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,16,2,128,1,float16,fp8,0,0.04164800047874451
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,16,4,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,16,2,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,16,4,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,16,2,128,1,fp8,fp8,0,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,16,4,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,16,8,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,16,16,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,16,8,128,1,fp8,fp8,0,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,16,16,128,1,fp8,fp8,0,0.03448000053564707
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,16,1,128,1,float16,fp8,0,0.03344533344109853
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,16,1,128,1,float16,float16,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,16,8,128,1,float16,float16,0,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,16,16,128,1,float16,fp8,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,16,1,128,1,fp8,fp8,0,0.031397332747777305
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,16,2,128,1,float16,float16,0,0.034474665919939675
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,16,2,128,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,16,2,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,16,4,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,16,8,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,16,4,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,16,8,128,1,float16,float16,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,16,8,128,1,fp8,fp8,0,0.03379733363787333
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,16,4,128,1,fp8,fp8,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,16,1,128,1,float16,float16,0,1.316864013671875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,16,1,128,1,float16,fp8,0,1.3081706364949544
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,16,1,128,1,fp8,fp8,0,1.4073012669881184
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,16,2,128,1,float16,float16,0,1.3335893948872883
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,16,2,128,1,float16,fp8,0,1.3117760022481282
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,16,2,128,1,fp8,fp8,0,1.4366772969563801
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,16,4,128,1,float16,float16,0,1.392639954884847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,16,4,128,1,float16,fp8,0,1.3666879336039226
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,16,1,128,1,float16,float16,0,0.667306661605835
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,16,4,128,1,fp8,fp8,0,1.6587093671162922
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,16,16,128,1,float16,fp8,0,0.7454720338185629
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,16,16,128,1,float16,float16,0,0.7591626644134521
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,16,16,128,1,fp8,fp8,0,0.8197120030721029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,16,1,128,1,float16,fp8,0,0.6645813385645548
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,16,8,128,1,float16,fp8,0,1.3721599578857422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,16,8,128,1,float16,float16,0,1.384485403696696
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,16,8,128,1,fp8,fp8,0,1.6603946685791016
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,16,1,128,1,fp8,fp8,0,0.7150932947794596
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,16,2,128,1,float16,float16,0,0.6734506289164225
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,16,2,128,1,float16,fp8,0,0.6686720053354899
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,16,2,128,1,fp8,fp8,0,0.7372746467590332
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,16,4,128,1,float16,fp8,0,0.6898346741994222
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,16,4,128,1,fp8,fp8,0,0.8248319625854492
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,16,4,128,1,float16,float16,0,0.6976799964904785
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,16,8,128,1,float16,fp8,0,0.6929120222727457
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,16,8,128,1,float16,float16,0,0.7031520207722982
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,16,1,128,1,float16,float16,0,0.342522660891215
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,16,16,128,1,float16,float16,0,0.3848533233006795
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,16,1,128,1,float16,fp8,0,0.3452586730321248
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,16,16,128,1,float16,fp8,0,0.3780213197072347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,16,8,128,1,fp8,fp8,0,0.816981315612793
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,16,1,128,1,fp8,fp8,0,0.36881065368652344
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,16,2,128,1,float16,float16,0,0.3493493398030599
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,16,2,128,1,float16,fp8,0,0.3452639977137248
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,16,16,128,1,fp8,fp8,0,0.41096532344818115
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,16,2,128,1,fp8,fp8,0,0.37461864948272705
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,16,4,128,1,float16,fp8,0,0.35788265864054364
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,16,4,128,1,float16,float16,0,0.35993067423502606
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,16,4,128,1,fp8,fp8,0,0.40858133633931476
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,16,16,128,1,float16,float16,0,0.20325867335001627
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,16,8,128,1,float16,float16,0,0.36471466223398846
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,16,16,128,1,float16,fp8,0,0.19985065857569376
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,16,1,128,1,float16,float16,0,0.18397865692774454
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,16,1,128,1,float16,fp8,0,0.18175999323527017
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,16,8,128,1,float16,fp8,0,0.36027733484903973
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,16,8,128,1,fp8,fp8,0,0.4027786652247111
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,16,16,128,1,fp8,fp8,0,0.21677333116531372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,16,1,128,1,fp8,fp8,0,0.19421867529551187
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,16,2,128,1,float16,float16,0,0.1853440006573995
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,16,2,128,1,float16,fp8,0,0.183296004931132
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,16,4,128,1,float16,float16,0,0.19131733973821005
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,16,4,128,1,float16,fp8,0,0.19165867567062378
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,16,4,128,1,fp8,fp8,0,0.21126399437586466
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,16,2,128,1,fp8,fp8,0,0.19848533471425375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,16,8,128,1,float16,float16,0,0.19353600343068442
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,16,1,128,1,float16,float16,0,0.10068800052007039
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,16,1,128,1,fp8,fp8,0,0.10478400190671285
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,16,16,128,1,float16,float16,0,0.11331733067830403
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,16,8,128,1,fp8,fp8,0,0.2092640002568563
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,16,8,128,1,float16,fp8,0,0.19216533501942953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,16,1,128,1,float16,fp8,0,0.0993386705716451
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,16,16,128,1,float16,fp8,0,0.11264000336329143
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,16,16,128,1,fp8,fp8,0,0.11946666240692139
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,16,2,128,1,float16,float16,0,0.10035199920336406
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,16,2,128,1,float16,fp8,0,0.09966933727264404
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,16,2,128,1,fp8,fp8,0,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,16,4,128,1,float16,float16,0,0.10410666465759277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,16,4,128,1,float16,fp8,0,0.10238933563232422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,16,8,128,1,float16,float16,0,0.10787199934323628
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,16,8,128,1,float16,fp8,0,0.10514133175214131
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,16,4,128,1,fp8,fp8,0,0.11332266529401143
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,16,8,128,1,fp8,fp8,0,0.11434132854143779
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,16,16,128,1,float16,float16,0,0.06178666651248932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,16,1,128,1,float16,float16,0,0.056661332647005715
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,16,16,128,1,fp8,fp8,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,16,16,128,1,float16,fp8,0,0.06109866499900818
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,16,1,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,16,1,128,1,fp8,fp8,0,0.05836800237496694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,16,2,128,1,fp8,fp8,0,0.06006933252016703
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,16,2,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,16,4,128,1,float16,fp8,0,0.05770133435726166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,16,2,128,1,float16,float16,0,0.057002668579419456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,16,4,128,1,float16,float16,0,0.05973866581916809
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,16,4,128,1,fp8,fp8,0,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,16,8,128,1,float16,float16,0,0.05940799911816915
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,16,8,128,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,16,16,128,1,float16,float16,0,0.03719466676314672
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,16,16,128,1,float16,fp8,0,0.03754133234421412
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,16,8,128,1,fp8,fp8,0,0.0628053347269694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,16,1,128,1,float16,float16,0,0.035504000882307686
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,16,1,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,16,1,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,16,2,128,1,float16,float16,0,0.03617066641648611
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,16,2,128,1,fp8,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,16,2,128,1,float16,fp8,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,16,4,128,1,float16,float16,0,0.03721600025892258
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,16,16,128,1,fp8,fp8,0,0.04029333343108495
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,16,4,128,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,16,4,128,1,fp8,fp8,0,0.03722133239110311
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,16,8,128,1,float16,float16,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,16,8,128,1,float16,fp8,0,0.036864000062147774
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,16,8,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,16,16,128,1,float16,float16,0,0.030031998952229817
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,16,16,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,16,1,128,1,float16,float16,0,0.02903999884923299
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,16,1,128,1,fp8,fp8,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,16,1,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,16,2,128,1,float16,float16,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,16,2,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,16,2,128,1,fp8,fp8,0,0.02902399996916453
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,16,4,128,1,float16,float16,0,0.028330666323502857
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,16,16,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,16,8,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,16,4,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,16,8,128,1,float16,fp8,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,16,4,128,1,float16,fp8,0,0.02903466671705246
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,16,8,128,1,float16,float16,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,16,1,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,16,1,128,1,float16,fp8,0,0.025610665480295818
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,16,1,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,16,16,128,1,float16,fp8,0,0.02493866781393687
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,16,2,128,1,float16,float16,0,0.02492800106604894
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,16,2,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,16,16,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,16,4,128,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,16,4,128,1,float16,float16,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,16,16,128,1,float16,float16,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,16,2,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,16,4,128,1,fp8,fp8,0,0.023893333971500397
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,16,8,128,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,16,8,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,16,8,128,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,16,1,128,1,float16,float16,0,0.5108000040054321
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,16,1,128,1,float16,fp8,0,0.49957334995269775
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,16,2,128,1,float16,fp8,0,0.5060319900512695
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,16,2,128,1,float16,float16,0,0.5077333450317383
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,16,1,128,1,fp8,fp8,0,0.5304586489995321
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,16,2,128,1,fp8,fp8,0,0.5471466779708862
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,16,4,128,1,float16,float16,0,0.5300853252410889
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,16,4,128,1,float16,fp8,0,0.5249706506729126
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,16,4,128,1,fp8,fp8,0,0.6439253489176432
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,16,1,128,1,float16,float16,0,0.26367467641830444
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,16,16,128,1,float16,fp8,0,0.2964479923248291
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,16,16,128,1,float16,float16,0,0.3012213309605916
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,16,16,128,1,fp8,fp8,0,0.3199946681658427
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,16,8,128,1,float16,float16,0,0.537610650062561
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,16,8,128,1,float16,fp8,0,0.529749313990275
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,16,8,128,1,fp8,fp8,0,0.6232693195343018
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,16,1,128,1,float16,fp8,0,0.2653866608937581
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,16,1,128,1,fp8,fp8,0,0.2776693304379781
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,16,2,128,1,float16,float16,0,0.2681173284848531
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,16,2,128,1,float16,fp8,0,0.26813334226608276
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,16,2,128,1,fp8,fp8,0,0.285861333211263
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,16,4,128,1,float16,fp8,0,0.274944007396698
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,16,4,128,1,float16,float16,0,0.279039998849233
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,16,4,128,1,fp8,fp8,0,0.3199999928474426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,16,8,128,1,float16,float16,0,0.2855253418286641
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,16,16,128,1,float16,float16,0,0.1665493349234263
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,16,1,128,1,float16,float16,0,0.14317867159843445
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,16,1,128,1,float16,fp8,0,0.14130666851997375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,16,8,128,1,fp8,fp8,0,0.31385066111882526
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,16,16,128,1,float16,fp8,0,0.16315199931462607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,16,8,128,1,float16,fp8,0,0.27665066719055176
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,16,1,128,1,fp8,fp8,0,0.15035733580589294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,16,16,128,1,fp8,fp8,0,0.1718613306681315
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,16,2,128,1,float16,float16,0,0.14728533228238425
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,16,2,128,1,float16,fp8,0,0.14592533310254416
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,16,2,128,1,fp8,fp8,0,0.15377066532770792
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,16,4,128,1,float16,float16,0,0.1525813341140747
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,16,4,128,1,float16,fp8,0,0.1527466674645742
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,16,4,128,1,fp8,fp8,0,0.16607466340065002
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,16,1,128,1,float16,float16,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,16,8,128,1,float16,float16,0,0.15547733505566916
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,16,8,128,1,fp8,fp8,0,0.16366933782895407
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,16,8,128,1,float16,fp8,0,0.1513813336690267
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,16,16,128,1,float16,float16,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,16,16,128,1,fp8,fp8,0,0.09727467099825542
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,16,16,128,1,float16,fp8,0,0.09318400422732036
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,16,1,128,1,float16,fp8,0,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,16,1,128,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,16,2,128,1,float16,float16,0,0.08089066545168559
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,16,2,128,1,float16,fp8,0,0.07918933530648549
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,16,2,128,1,fp8,fp8,0,0.08396800359090169
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,16,4,128,1,float16,fp8,0,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,16,4,128,1,float16,float16,0,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,16,4,128,1,fp8,fp8,0,0.09079466263453166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,16,1,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,16,1,128,1,float16,fp8,0,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,16,8,128,1,float16,fp8,0,0.08601599931716919
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,16,16,128,1,float16,float16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,16,8,128,1,float16,float16,0,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,16,16,128,1,fp8,fp8,0,0.05426666637261709
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,16,8,128,1,fp8,fp8,0,0.09114133318265279
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,16,16,128,1,float16,fp8,0,0.05018133421738943
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,16,1,128,1,fp8,fp8,0,0.046762665112813316
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,16,2,128,1,float16,float16,0,0.047450666626294456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,16,2,128,1,fp8,fp8,0,0.047450666626294456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,16,2,128,1,float16,fp8,0,0.047781333327293396
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,16,8,128,1,float16,float16,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,16,4,128,1,fp8,fp8,0,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,16,4,128,1,float16,float16,0,0.048122664292653404
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,16,16,128,1,float16,float16,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,16,4,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,16,8,128,1,float16,fp8,0,0.04915200173854828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,16,1,128,1,float16,float16,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,16,8,128,1,fp8,fp8,0,0.049813335140546165
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,16,16,128,1,float16,fp8,0,0.03275733441114426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,16,1,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,16,16,128,1,fp8,fp8,0,0.03344533344109853
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,16,2,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,16,4,128,1,fp8,fp8,0,0.03307733436425527
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,16,2,128,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,16,1,128,1,float16,fp8,0,0.03142400085926056
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,16,8,128,1,float16,float16,0,0.03138133386770884
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,16,2,128,1,float16,float16,0,0.03243733445803324
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,16,4,128,1,float16,float16,0,0.03108799954255422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,16,4,128,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,16,8,128,1,float16,fp8,0,0.03309866786003113
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,16,8,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,16,16,128,1,fp8,fp8,0,0.024906667570273083
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,16,16,128,1,float16,float16,0,0.026954665780067444
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,16,1,128,1,float16,float16,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,16,2,128,1,float16,float16,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,16,1,128,1,float16,fp8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,16,1,128,1,fp8,fp8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,16,2,128,1,float16,fp8,0,0.024901332954565685
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,16,2,128,1,fp8,fp8,0,0.023887999355793
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,16,16,128,1,float16,fp8,0,0.025253333151340485
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,16,4,128,1,float16,float16,0,0.02526933451493581
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,16,4,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,16,4,128,1,float16,fp8,0,0.025941332181294758
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,16,8,128,1,float16,float16,0,0.025600001215934753
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,16,16,128,1,float16,float16,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,16,8,128,1,fp8,fp8,0,0.02489600082238515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,16,8,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,16,1,128,1,float16,float16,0,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,16,1,128,1,float16,fp8,0,0.020810666183630627
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,16,16,128,1,fp8,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,16,2,128,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,16,2,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,16,1,128,1,fp8,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,16,16,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,16,2,128,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,16,4,128,1,float16,float16,0,0.020831999679406483
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,16,4,128,1,float16,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,16,8,128,1,float16,float16,0,0.022533332308133442
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,16,4,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,16,8,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,16,8,128,1,fp8,fp8,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,16,16,128,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,16,16,128,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,16,1,128,1,float16,float16,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,16,1,128,1,float16,fp8,0,0.020848001043001812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,16,2,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,16,1,128,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,16,2,128,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,16,2,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,16,16,128,1,float16,float16,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,16,4,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,16,4,128,1,fp8,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,16,4,128,1,float16,float16,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,16,8,128,1,float16,float16,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,16,8,128,1,float16,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,16,8,128,1,fp8,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,16,1,128,1,float16,float16,0,0.2786986629168193
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,16,1,128,1,fp8,fp8,0,0.2851840058962504
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,16,2,128,1,fp8,fp8,0,0.2943999965985616
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,16,2,128,1,float16,fp8,0,0.28142933050791424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,16,2,128,1,float16,float16,0,0.28278932968775433
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,16,1,128,1,float16,fp8,0,0.2739040056864421
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,16,4,128,1,float16,float16,0,0.29505600531895954
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,16,4,128,1,float16,fp8,0,0.29063467184702557
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,16,1,128,1,float16,float16,0,0.14967466394106546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,16,4,128,1,fp8,fp8,0,0.3298986752827962
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,16,16,128,1,float16,float16,0,0.1706719994544983
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,16,8,128,1,float16,float16,0,0.29576534032821655
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,16,16,128,1,fp8,fp8,0,0.17698132991790771
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,16,16,128,1,float16,fp8,0,0.16538133223851523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,16,8,128,1,float16,fp8,0,0.2913279930750529
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,16,1,128,1,float16,fp8,0,0.1469439963499705
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,16,8,128,1,fp8,fp8,0,0.32205333312352497
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,16,1,128,1,fp8,fp8,0,0.15495999654134116
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,16,2,128,1,float16,fp8,0,0.14967466394106546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,16,2,128,1,float16,float16,0,0.1539306640625
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,16,2,128,1,fp8,fp8,0,0.1585493286450704
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,16,4,128,1,float16,fp8,0,0.1585493286450704
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,16,4,128,1,fp8,fp8,0,0.16776533921559653
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,16,4,128,1,float16,float16,0,0.1585546632607778
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,16,8,128,1,float16,float16,0,0.15718400478363037
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,16,8,128,1,float16,fp8,0,0.15803200006484985
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,16,1,128,1,float16,float16,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,16,16,128,1,float16,float16,0,0.09659199913342793
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,16,16,128,1,float16,fp8,0,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,16,16,128,1,fp8,fp8,0,0.0993333359559377
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,16,8,128,1,fp8,fp8,0,0.16980799039204916
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,16,1,128,1,fp8,fp8,0,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,16,1,128,1,float16,fp8,0,0.08055999875068665
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,16,2,128,1,float16,float16,0,0.0825973351796468
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,16,2,128,1,float16,fp8,0,0.08089600006739299
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,16,2,128,1,fp8,fp8,0,0.0846506655216217
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,16,4,128,1,float16,float16,0,0.0846506655216217
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,16,4,128,1,float16,fp8,0,0.0846560001373291
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,16,4,128,1,fp8,fp8,0,0.09318932890892029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,16,8,128,1,float16,float16,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,16,8,128,1,fp8,fp8,0,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,16,1,128,1,float16,float16,0,0.047781333327293396
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,16,1,128,1,float16,fp8,0,0.047781333327293396
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,16,16,128,1,float16,float16,0,0.050517335534095764
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,16,8,128,1,float16,fp8,0,0.08601599931716919
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,16,16,128,1,float16,fp8,0,0.05118933320045471
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,16,1,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,16,16,128,1,fp8,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,16,2,128,1,float16,float16,0,0.049135997891426086
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,16,2,128,1,float16,fp8,0,0.04947733382383982
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,16,2,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,16,4,128,1,float16,float16,0,0.04948266843954722
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,16,4,128,1,fp8,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,16,8,128,1,float16,float16,0,0.049829334020614624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,16,4,128,1,float16,fp8,0,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,16,8,128,1,float16,fp8,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,16,16,128,1,float16,float16,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,16,16,128,1,float16,fp8,0,0.03344533344109853
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,16,16,128,1,fp8,fp8,0,0.03379199902216593
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,16,8,128,1,fp8,fp8,0,0.05256533126036326
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,16,1,128,1,float16,float16,0,0.03209066639343897
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,16,2,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,16,2,128,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,16,1,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,16,4,128,1,float16,float16,0,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,16,4,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,16,2,128,1,float16,float16,0,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,16,4,128,1,fp8,fp8,0,0.034474665919939675
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,16,1,128,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,16,8,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,16,8,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,16,16,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,16,16,128,1,float16,fp8,0,0.022848000129063923
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,16,8,128,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,16,16,128,1,fp8,fp8,0,0.023898666103680927
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,16,1,128,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,16,2,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,16,2,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,16,2,128,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,16,1,128,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,16,4,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,16,8,128,1,float16,float16,0,0.022533332308133442
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,16,4,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,16,1,128,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,16,4,128,1,float16,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,16,8,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,16,8,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,16,16,128,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,16,16,128,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,16,1,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,16,2,128,1,float16,float16,0,0.01809599995613098
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,16,1,128,1,fp8,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,16,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,16,2,128,1,fp8,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,16,4,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,16,16,128,1,float16,float16,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,16,1,128,1,float16,fp8,0,0.01876266673207283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,16,4,128,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,16,8,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,16,8,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,16,16,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,16,4,128,1,float16,fp8,0,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,16,8,128,1,float16,float16,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,16,1,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,16,1,128,1,float16,float16,0,0.017429333180189133
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,16,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,16,1,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,16,16,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,16,2,128,1,float16,fp8,0,0.018789333601792652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,16,2,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,16,4,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,16,4,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,16,4,128,1,float16,fp8,0,0.018800000349680584
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,16,16,128,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,16,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,16,8,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,16,8,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,16,16,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,16,16,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,16,16,128,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,16,1,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,16,1,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,16,2,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,16,2,128,1,float16,fp8,0,0.01669866715868314
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,16,2,128,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,16,4,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,16,4,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,16,4,128,1,fp8,fp8,0,0.016704000532627106
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,16,8,128,1,float16,float16,0,0.01670933390657107
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,16,1,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,16,8,128,1,float16,fp8,0,0.018085333208243053
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,16,8,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,16,1,128,1,float16,fp8,0,0.18876266479492188
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,16,1,128,1,float16,float16,0,0.19062934319178262
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,16,1,128,1,fp8,fp8,0,0.2063360015551249
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,16,2,128,1,float16,fp8,0,0.19098132848739624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,16,2,128,1,float16,float16,0,0.1914880077044169
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,16,4,128,1,float16,float16,0,0.20173867543538412
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,16,4,128,1,float16,fp8,0,0.1971199909845988
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,16,2,128,1,fp8,fp8,0,0.20957332849502563
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,16,4,128,1,fp8,fp8,0,0.22220800320307413
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,16,8,128,1,float16,float16,0,0.20206934213638306
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,16,8,128,1,float16,fp8,0,0.19900800784428915
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,16,1,128,1,float16,float16,0,0.10306666294733684
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,16,16,128,1,float16,fp8,0,0.11401066184043884
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,16,16,128,1,float16,float16,0,0.11539199948310852
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,16,16,128,1,fp8,fp8,0,0.12526933352152506
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,16,8,128,1,fp8,fp8,0,0.2242400050163269
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,16,1,128,1,float16,fp8,0,0.10240000486373901
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,16,2,128,1,float16,float16,0,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,16,1,128,1,fp8,fp8,0,0.10920000076293945
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,16,2,128,1,fp8,fp8,0,0.11055466532707214
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,16,4,128,1,float16,float16,0,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,16,4,128,1,float16,fp8,0,0.10752000411351521
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,16,2,128,1,float16,fp8,0,0.10307733217875163
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,16,4,128,1,fp8,fp8,0,0.11980266372362773
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,16,8,128,1,float16,float16,0,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,16,1,128,1,float16,fp8,0,0.058362667759259544
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,16,8,128,1,float16,fp8,0,0.10751466949780782
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,16,1,128,1,fp8,fp8,0,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,16,8,128,1,fp8,fp8,0,0.1204906702041626
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,16,16,128,1,float16,float16,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,16,16,128,1,float16,fp8,0,0.06314133107662201
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,16,16,128,1,fp8,fp8,0,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,16,1,128,1,float16,float16,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,16,2,128,1,float16,float16,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,16,2,128,1,fp8,fp8,0,0.06246933341026306
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,16,2,128,1,float16,fp8,0,0.058703998724619545
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,16,4,128,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,16,4,128,1,float16,float16,0,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,16,4,128,1,fp8,fp8,0,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,16,8,128,1,float16,float16,0,0.061434666315714516
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,16,16,128,1,float16,float16,0,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,16,16,128,1,float16,fp8,0,0.03959999978542328
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,16,16,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,16,1,128,1,float16,float16,0,0.038912000755469
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,16,8,128,1,fp8,fp8,0,0.06588266789913177
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,16,8,128,1,float16,fp8,0,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,16,1,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,16,1,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,16,2,128,1,float16,float16,0,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,16,2,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,16,2,128,1,fp8,fp8,0,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,16,4,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,16,8,128,1,float16,float16,0,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,16,4,128,1,float16,fp8,0,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,16,8,128,1,float16,fp8,0,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,16,8,128,1,fp8,fp8,0,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,16,16,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,16,16,128,1,float16,fp8,0,0.0266239990790685
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,16,4,128,1,fp8,fp8,0,0.04131733377774557
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,16,1,128,1,float16,float16,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,16,16,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,16,1,128,1,float16,fp8,0,0.026629333694775898
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,16,1,128,1,fp8,fp8,0,0.026975999275843304
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,16,2,128,1,float16,float16,0,0.025263999899228413
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,16,2,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,16,2,128,1,fp8,fp8,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,16,4,128,1,float16,fp8,0,0.025231999655564625
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,16,4,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,16,8,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,16,8,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,16,16,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,16,16,128,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,16,8,128,1,fp8,fp8,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,16,4,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,16,16,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,16,1,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,16,1,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,16,2,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,16,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,16,1,128,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,16,2,128,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,16,4,128,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,16,4,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,16,8,128,1,float16,fp8,0,0.019472000499566395
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,16,8,128,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,16,8,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,16,16,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,16,16,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,16,4,128,1,float16,float16,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,16,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,16,1,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,16,1,128,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,16,2,128,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,16,16,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,16,2,128,1,float16,float16,0,0.01740266631046931
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,16,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,16,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,16,4,128,1,float16,fp8,0,0.016693333784739178
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,16,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,16,8,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,16,16,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,16,8,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,16,16,128,1,float16,fp8,0,0.01669866715868314
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,16,16,128,1,fp8,fp8,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,16,1,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,16,1,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,16,1,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,16,4,128,1,float16,float16,0,0.01664000004529953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,16,2,128,1,float16,float16,0,0.016704000532627106
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,16,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,16,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,16,4,128,1,float16,float16,0,0.015365333606799444
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,16,4,128,1,float16,fp8,0,0.016704000532627106
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,16,8,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,16,4,128,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,16,8,128,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,16,16,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,16,16,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,16,16,128,1,fp8,fp8,0,0.01669866715868314
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,16,1,128,1,float16,float16,0,0.01670933390657107
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,16,1,128,1,float16,fp8,0,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,16,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,16,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,16,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,16,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,16,4,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,16,4,128,1,float16,fp8,0,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,16,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,16,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,16,8,128,1,float16,fp8,0,0.017743999759356182
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,16,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,16,8,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,16,1,128,1,float16,float16,0,0.15121066570281982
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,16,1,128,1,float16,fp8,0,0.15002133448918661
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,16,1,128,1,fp8,fp8,0,0.16638400157292685
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,16,2,128,1,float16,float16,0,0.15205867091814676
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,16,2,128,1,float16,fp8,0,0.15121066570281982
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,16,2,128,1,fp8,fp8,0,0.16725865999857584
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,16,4,128,1,float16,float16,0,0.15633066495259604
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,16,4,128,1,fp8,fp8,0,0.17578667402267456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,16,8,128,1,float16,float16,0,0.15598400433858237
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,16,8,128,1,float16,fp8,0,0.15615999698638916
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,16,8,128,1,fp8,fp8,0,0.17577066024144491
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,16,16,128,1,float16,float16,0,0.09079466263453166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,16,16,128,1,float16,fp8,0,0.08673066894213359
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,16,4,128,1,float16,fp8,0,0.15582399566968283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,16,16,128,1,fp8,fp8,0,0.09830400347709656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,16,1,128,1,float16,float16,0,0.08329600095748901
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,16,1,128,1,float16,fp8,0,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,16,1,128,1,fp8,fp8,0,0.09044800202051799
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,16,2,128,1,float16,float16,0,0.08396800359090169
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,16,2,128,1,fp8,fp8,0,0.0897706647713979
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,16,2,128,1,float16,fp8,0,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,16,4,128,1,float16,fp8,0,0.0846560001373291
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,16,8,128,1,float16,float16,0,0.0867039958635966
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,16,4,128,1,float16,float16,0,0.0867039958635966
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,16,4,128,1,fp8,fp8,0,0.09354133407274882
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,16,8,128,1,float16,fp8,0,0.08431466420491536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,16,8,128,1,fp8,fp8,0,0.09318932890892029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,16,16,128,1,float16,float16,0,0.05222400029500326
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,16,16,128,1,fp8,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,16,1,128,1,float16,float16,0,0.051551997661590576
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,16,16,128,1,float16,fp8,0,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,16,1,128,1,fp8,fp8,0,0.054272000988324486
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,16,2,128,1,float16,float16,0,0.049813335140546165
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,16,2,128,1,float16,fp8,0,0.05017599960168203
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,16,1,128,1,float16,fp8,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,16,2,128,1,fp8,fp8,0,0.054618666569391884
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,16,4,128,1,float16,fp8,0,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,16,4,128,1,float16,float16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,16,8,128,1,float16,float16,0,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,16,8,128,1,float16,fp8,0,0.051551997661590576
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,16,4,128,1,fp8,fp8,0,0.05461333195368449
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,16,8,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,16,16,128,1,fp8,fp8,0,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,16,1,128,1,float16,fp8,0,0.032085334261258446
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,16,1,128,1,fp8,fp8,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,16,1,128,1,float16,float16,0,0.03140799949566523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,16,16,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,16,2,128,1,float16,float16,0,0.031397332747777305
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,16,2,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,16,16,128,1,float16,float16,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,16,2,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,16,4,128,1,float16,float16,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,16,4,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,16,4,128,1,fp8,fp8,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,16,8,128,1,float16,fp8,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,16,8,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,16,8,128,1,float16,float16,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,16,1,128,1,float16,float16,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,16,16,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,16,16,128,1,fp8,fp8,0,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,16,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,16,1,128,1,fp8,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,16,2,128,1,float16,float16,0,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,16,2,128,1,float16,fp8,0,0.022853332261244457
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,16,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,16,4,128,1,float16,float16,0,0.022885332504908245
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,16,16,128,1,float16,float16,0,0.023925334215164185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,16,4,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,16,4,128,1,fp8,fp8,0,0.022858666876951855
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,16,8,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,16,8,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,16,16,128,1,float16,float16,0,0.01674666628241539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,16,16,128,1,float16,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,16,8,128,1,fp8,fp8,0,0.022853332261244457
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,16,1,128,1,float16,float16,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,16,1,128,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,16,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,16,2,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,16,2,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,16,2,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,16,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,16,4,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,16,16,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,16,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,16,4,128,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,16,8,128,1,float16,fp8,0,0.01670933390657107
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,16,8,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,16,16,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,16,16,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,16,16,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,16,1,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,16,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,16,2,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,16,1,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,16,2,128,1,fp8,fp8,0,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,16,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,16,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,16,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,16,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,16,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,16,1,128,1,float16,float16,0,0.015087999403476715
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,16,16,128,1,float16,float16,0,0.015370666980743408
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,16,8,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,16,16,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,16,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,16,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,16,2,128,1,float16,fp8,0,0.016613333175579708
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,16,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,16,16,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,16,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,16,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,16,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,16,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,16,8,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,16,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,16,8,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,16,2,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,16,16,128,1,float16,float16,0,0.015365333606799444
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,16,16,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,16,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,16,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,16,16,128,1,float16,fp8,0,0.01670933390657107
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,16,1,128,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,16,2,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,16,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,16,4,128,1,float16,float16,0,0.015706667055686314
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,16,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,16,4,128,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,16,8,128,1,float16,fp8,0,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,16,8,128,1,float16,float16,0,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,16,8,128,1,fp8,fp8,0,0.016048000504573185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,16,2,128,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,16,1,128,1,float16,fp8,0,0.1312373379866282
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,16,1,128,1,float16,float16,0,0.13175466656684875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,16,1,128,1,fp8,fp8,0,0.145578662554423
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,16,2,128,1,float16,float16,0,0.13277332981427512
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,16,2,128,1,float16,fp8,0,0.13173333803812662
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,16,2,128,1,fp8,fp8,0,0.145578662554423
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,16,4,128,1,float16,float16,0,0.13330666224161783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,16,4,128,1,fp8,fp8,0,0.14778666694959006
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,16,4,128,1,float16,fp8,0,0.13380266229311624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,16,8,128,1,float16,fp8,0,0.1327786644299825
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,16,8,128,1,float16,float16,0,0.13380799690882364
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,16,8,128,1,fp8,fp8,0,0.14864533146222433
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,16,16,128,1,float16,float16,0,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,16,16,128,1,float16,fp8,0,0.07611200213432312
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,16,16,128,1,fp8,fp8,0,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,16,1,128,1,float16,float16,0,0.07543466488520305
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,16,1,128,1,fp8,fp8,0,0.08123733103275299
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,16,2,128,1,float16,fp8,0,0.07436800003051758
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,16,2,128,1,fp8,fp8,0,0.08225599924723308
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,16,2,128,1,float16,float16,0,0.07612266639868419
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,16,4,128,1,fp8,fp8,0,0.08396266897519429
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,16,4,128,1,float16,float16,0,0.07574933270613353
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,16,4,128,1,float16,fp8,0,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,16,1,128,1,float16,fp8,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,16,8,128,1,float16,float16,0,0.07577600081761678
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,16,8,128,1,float16,fp8,0,0.07646400233109792
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,16,16,128,1,float16,fp8,0,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,16,1,128,1,float16,float16,0,0.04607999821503957
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,16,16,128,1,fp8,fp8,0,0.050517335534095764
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,16,16,128,1,float16,float16,0,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,16,8,128,1,fp8,fp8,0,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,16,1,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,16,2,128,1,float16,float16,0,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,16,2,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,16,2,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,16,1,128,1,float16,fp8,0,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,16,4,128,1,float16,float16,0,0.04642133414745331
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,16,4,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,16,4,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,16,8,128,1,float16,float16,0,0.04607999821503957
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,16,8,128,1,float16,fp8,0,0.046762665112813316
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,16,8,128,1,fp8,fp8,0,0.05018133421738943
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,16,16,128,1,float16,float16,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,16,16,128,1,fp8,fp8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,16,1,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,16,2,128,1,float16,float16,0,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,16,1,128,1,float16,float16,0,0.029359998802344005
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,16,2,128,1,float16,fp8,0,0.03108799954255422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,16,1,128,1,float16,fp8,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,16,2,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,16,16,128,1,float16,fp8,0,0.030037333567937214
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,16,4,128,1,float16,float16,0,0.02934933453798294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,16,4,128,1,float16,fp8,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,16,4,128,1,fp8,fp8,0,0.031045332551002502
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,16,8,128,1,float16,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,16,8,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,16,16,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,16,8,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,16,16,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,16,1,128,1,float16,float16,0,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,16,1,128,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,16,1,128,1,fp8,fp8,0,0.022858666876951855
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,16,2,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,16,2,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,16,16,128,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,16,2,128,1,fp8,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,16,4,128,1,float16,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,16,4,128,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,16,4,128,1,float16,float16,0,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,16,8,128,1,float16,float16,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,16,8,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,16,8,128,1,fp8,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,16,16,128,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,16,16,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,16,1,128,1,float16,float16,0,0.01674666628241539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,16,1,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,16,1,128,1,float16,fp8,0,0.01876266673207283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,16,2,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,16,16,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,16,4,128,1,float16,float16,0,0.017082666357358296
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,16,4,128,1,float16,fp8,0,0.017738666385412216
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,16,2,128,1,fp8,fp8,0,0.018805333723624546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,16,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,16,4,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,16,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,16,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,16,16,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,16,16,128,1,float16,fp8,0,0.016751999656359356
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,16,1,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,16,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,16,16,128,1,fp8,fp8,0,0.01670933390657107
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,16,1,128,1,float16,float16,0,0.016037333756685257
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,16,2,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,16,2,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,16,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,16,2,128,1,float16,float16,0,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,16,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,16,8,128,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,16,4,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,16,8,128,1,float16,float16,0,0.015685333559910457
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,16,8,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,16,16,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,16,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,16,1,128,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,16,16,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,16,8,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,16,2,128,1,float16,float16,0,0.01637866720557213
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,16,2,128,1,float16,fp8,0,0.016741332908471424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,16,2,128,1,fp8,fp8,0,0.016704000532627106
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,16,4,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,16,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,16,16,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,16,1,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,16,8,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,16,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,16,16,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,16,8,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,16,16,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,16,4,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,16,1,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,16,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,16,2,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,16,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,16,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,16,4,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,16,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,16,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,16,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,16,8,128,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,16,8,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,16,8,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,16,16,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,12,1,128,1,float16,float16,0,5.7359358469645185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,12,1,128,1,fp8,fp8,0,4.596735954284668
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,12,1,128,1,float16,fp8,0,5.463205337524414
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,12,2,128,1,fp8,fp8,0,4.613973299662272
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,12,2,128,1,float16,float16,0,5.78441047668457
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,12,2,128,1,float16,fp8,0,5.709664026896159
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,12,4,128,1,float16,float16,0,5.769045511881511
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,12,4,128,1,float16,fp8,0,5.9791520436604815
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,12,1,128,1,float16,float16,0,2.7535413106282554
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,12,1,128,1,float16,fp8,0,2.7600107192993164
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,12,1,128,1,fp8,fp8,0,2.3905173937479653
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,12,2,128,1,float16,float16,0,2.7671947479248047
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,12,12,128,1,float16,float16,0,2.871994654337565
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,12,12,128,1,float16,fp8,0,2.897072156270345
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,12,12,128,1,fp8,fp8,0,2.468005339304606
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,12,2,128,1,fp8,fp8,0,2.3983786900838218
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,12,2,128,1,float16,fp8,0,2.760016123453776
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,12,4,128,1,fp8,fp8,0,4.6419572830200195
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,12,4,128,1,float16,float16,0,2.7818771998087564
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,12,1,128,1,float16,float16,0,1.4666719436645508
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,12,1,128,1,float16,fp8,0,1.4772906303405762
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,12,4,128,1,float16,fp8,0,2.77947203318278
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,12,4,128,1,fp8,fp8,0,2.4117280642191568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,12,1,128,1,fp8,fp8,0,1.296895980834961
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,12,12,128,1,float16,fp8,0,1.5319040616353352
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,12,12,128,1,fp8,fp8,0,1.335301399230957
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,12,12,128,1,float16,float16,0,1.5271040598551433
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,12,2,128,1,float16,fp8,0,1.482752005259196
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,12,2,128,1,float16,float16,0,1.4728533426920574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,12,2,128,1,fp8,fp8,0,1.3009920120239258
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,12,1,128,1,float16,float16,0,0.8241493701934814
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,12,1,128,1,float16,fp8,0,0.8258612950642904
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,12,4,128,1,float16,float16,0,1.4841066996256511
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,12,12,128,1,float16,float16,0,0.8475306828816732
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,12,4,128,1,fp8,fp8,0,1.3047466278076172
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,12,4,128,1,float16,fp8,0,1.4865066210428874
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,12,12,128,1,float16,fp8,0,0.8604959646860758
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,12,12,128,1,fp8,fp8,0,0.7652693589528402
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,12,1,128,1,fp8,fp8,0,0.7475252946217855
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,12,2,128,1,float16,float16,0,0.8279039859771729
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,12,2,128,1,float16,fp8,0,0.831658681233724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,12,2,128,1,fp8,fp8,0,0.7475199699401855
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,12,4,128,1,float16,float16,0,0.8407039642333984
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,12,4,128,1,float16,fp8,0,0.836949348449707
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,12,4,128,1,fp8,fp8,0,0.7540106773376465
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,12,1,128,1,float16,float16,0,3.2117811838785806
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,12,1,128,1,float16,fp8,0,3.190959930419922
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,12,1,128,1,fp8,fp8,0,2.7852748235066733
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,12,2,128,1,float16,float16,0,3.2158772150675454
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,12,2,128,1,float16,fp8,0,3.222026824951172
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,12,2,128,1,fp8,fp8,0,2.7890294392903647
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,12,4,128,1,float16,float16,0,3.289951960245768
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,12,4,128,1,float16,fp8,0,3.383978525797526
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,12,1,128,1,float16,float16,0,1.6634666124979656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,12,1,128,1,float16,fp8,0,1.6807252566019695
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,12,1,128,1,fp8,fp8,0,1.4626132647196453
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,12,12,128,1,float16,float16,0,1.7164106369018555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,12,4,128,1,fp8,fp8,0,2.815317471822103
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,12,12,128,1,float16,fp8,0,1.7307359377543132
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,12,2,128,1,float16,float16,0,1.6916426022847493
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,12,12,128,1,fp8,fp8,0,1.5226880709330242
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,12,2,128,1,float16,fp8,0,1.6728960673014324
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,12,2,128,1,fp8,fp8,0,1.468074639638265
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,12,1,128,1,float16,float16,0,0.8994186719258627
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,12,4,128,1,float16,float16,0,1.6810347239176433
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,12,1,128,1,float16,fp8,0,0.9069066842397054
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,12,4,128,1,float16,fp8,0,1.6807039578755696
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,12,12,128,1,float16,float16,0,0.928933302561442
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,12,4,128,1,fp8,fp8,0,1.4803627332051594
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,12,12,128,1,float16,fp8,0,0.9483946959177653
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,12,12,128,1,fp8,fp8,0,0.8362666765848795
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,12,1,128,1,fp8,fp8,0,0.8067413171132406
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,12,2,128,1,float16,float16,0,0.9069226582845052
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,12,2,128,1,fp8,fp8,0,0.8115200201670328
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,12,2,128,1,float16,fp8,0,0.9142613410949707
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,12,4,128,1,float16,float16,0,0.908618688583374
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,12,4,128,1,float16,fp8,0,0.9152533213297526
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,12,12,128,1,float16,float16,0,0.5386186838150024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,12,4,128,1,fp8,fp8,0,0.8183519840240479
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,12,1,128,1,float16,float16,0,0.5191680192947388
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,12,1,128,1,float16,fp8,0,0.5243146816889445
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,12,12,128,1,float16,fp8,0,0.5434079964955648
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,12,1,128,1,fp8,fp8,0,0.4763253529866536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,12,12,128,1,fp8,fp8,0,0.494762659072876
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,12,2,128,1,float16,float16,0,0.5208746592203776
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,12,2,128,1,float16,fp8,0,0.5232693354288737
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,12,2,128,1,fp8,fp8,0,0.48110934098561603
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,12,4,128,1,float16,float16,0,0.5253119866053263
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,12,4,128,1,float16,fp8,0,0.5307733217875162
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,12,4,128,1,fp8,fp8,0,0.48421335220336914
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,12,1,128,1,float16,float16,0,2.314570744832357
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,12,1,128,1,float16,fp8,0,2.3244853019714355
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,12,1,128,1,fp8,fp8,0,2.040496031443278
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,12,2,128,1,float16,float16,0,2.335744063059489
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,12,2,128,1,fp8,fp8,0,2.0493653615315757
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,12,2,128,1,float16,fp8,0,2.3639039993286133
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,12,4,128,1,float16,float16,0,2.369370619455973
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,12,4,128,1,float16,fp8,0,2.3545173009236655
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,12,1,128,1,float16,float16,0,1.2113920052846272
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,12,1,128,1,float16,fp8,0,1.2332159678141277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,12,1,128,1,fp8,fp8,0,1.0825386842091878
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,12,2,128,1,float16,float16,0,1.222656011581421
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,12,12,128,1,float16,fp8,0,1.2788000106811523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,12,12,128,1,float16,float16,0,1.266858657201131
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,12,4,128,1,fp8,fp8,0,2.0671146710713706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,12,12,128,1,fp8,fp8,0,1.1316533088684082
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,12,2,128,1,float16,fp8,0,1.2216320037841797
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,12,2,128,1,fp8,fp8,0,1.0890186627705891
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,12,4,128,1,float16,float16,0,1.2414240042368572
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,12,1,128,1,float16,float16,0,0.664576013882955
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,12,1,128,1,float16,fp8,0,0.6714026927947998
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,12,12,128,1,float16,fp8,0,0.6990559895833334
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,12,4,128,1,float16,fp8,0,1.2345919609069824
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,12,12,128,1,float16,float16,0,0.6942613124847412
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,12,4,128,1,fp8,fp8,0,1.09550937016805
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,12,12,128,1,fp8,fp8,0,0.6292479832967123
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,12,1,128,1,fp8,fp8,0,0.6065760056177775
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,12,2,128,1,float16,float16,0,0.6696960131327311
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,12,2,128,1,fp8,fp8,0,0.6072160005569458
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,12,2,128,1,float16,fp8,0,0.6761813163757324
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,12,4,128,1,float16,float16,0,0.6765226523081461
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,12,4,128,1,fp8,fp8,0,0.612005352973938
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,12,12,128,1,float16,float16,0,0.4102826515833537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,12,4,128,1,float16,fp8,0,0.6778879960378011
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,12,1,128,1,fp8,fp8,0,0.3633493185043335
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,12,12,128,1,fp8,fp8,0,0.37905065218607586
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,12,12,128,1,float16,fp8,0,0.41727999846140545
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,12,1,128,1,float16,fp8,0,0.3973120053609212
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,12,2,128,1,float16,float16,0,0.39628799756368
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,12,2,128,1,fp8,fp8,0,0.3657386700312297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,12,1,128,1,float16,float16,0,0.3962826728820801
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,12,2,128,1,float16,fp8,0,0.39662400881449383
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,12,4,128,1,float16,float16,0,0.39833064874013263
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,12,4,128,1,float16,fp8,0,0.4010666608810425
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,12,4,128,1,fp8,fp8,0,0.3705173333485921
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,12,1,128,1,float16,fp8,0,3.1145013173421225
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,12,1,128,1,fp8,fp8,0,2.7214508056640625
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,12,1,128,1,float16,float16,0,3.1349865595499673
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,12,2,128,1,float16,fp8,0,3.1138134002685547
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,12,2,128,1,float16,float16,0,3.0950454076131186
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,12,2,128,1,fp8,fp8,0,2.730496088663737
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,12,4,128,1,float16,float16,0,3.118933359781901
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,12,4,128,1,float16,fp8,0,3.16704527537028
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,12,1,128,1,float16,float16,0,1.5819093386332195
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,12,1,128,1,float16,fp8,0,1.585327943166097
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,12,1,128,1,fp8,fp8,0,1.4100534121195476
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,12,2,128,1,float16,float16,0,1.6058026949564617
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,12,12,128,1,float16,float16,0,1.662805398305257
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,12,2,128,1,float16,fp8,0,1.5911307334899902
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,12,12,128,1,fp8,fp8,0,1.4878719647725422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,12,12,128,1,float16,fp8,0,1.6964267094930012
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,12,4,128,1,fp8,fp8,0,2.7637866338094077
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,12,2,128,1,fp8,fp8,0,1.4124372800191243
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,12,1,128,1,float16,float16,0,0.8420693079630533
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,12,1,128,1,float16,fp8,0,0.8407039642333984
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,12,4,128,1,float16,float16,0,1.6006827354431152
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,12,4,128,1,float16,fp8,0,1.625941276550293
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,12,12,128,1,float16,float16,0,0.8775680065155029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,12,12,128,1,float16,fp8,0,0.8881493409474691
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,12,12,128,1,fp8,fp8,0,0.8009386857350668
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,12,4,128,1,fp8,fp8,0,1.4291626612345378
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,12,1,128,1,fp8,fp8,0,0.7577653725941976
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,12,2,128,1,float16,float16,0,0.8431093692779541
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,12,2,128,1,float16,fp8,0,0.8444586594899496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,12,2,128,1,fp8,fp8,0,0.7594719727834066
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,12,1,128,1,float16,float16,0,0.4664320151011149
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,12,4,128,1,float16,float16,0,0.8506080309549967
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,12,12,128,1,float16,float16,0,0.49001065889994305
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,12,4,128,1,float16,fp8,0,0.8557226657867432
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,12,12,128,1,float16,fp8,0,0.49578134218851727
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,12,4,128,1,fp8,fp8,0,0.7717599868774414
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,12,12,128,1,fp8,fp8,0,0.4524373213450114
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,12,1,128,1,float16,fp8,0,0.46882132689158124
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,12,1,128,1,fp8,fp8,0,0.43230398495992023
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,12,2,128,1,float16,float16,0,0.4694986740748088
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,12,2,128,1,float16,fp8,0,0.47086934248606366
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,12,2,128,1,fp8,fp8,0,0.4336693286895752
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,12,4,128,1,float16,float16,0,0.4753013451894124
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,12,4,128,1,float16,fp8,0,0.48006399472554523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,12,4,128,1,fp8,fp8,0,0.4384426673253377
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,12,12,128,1,float16,float16,0,0.29782400528589886
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,12,1,128,1,float16,float16,0,0.2879146734873454
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,12,1,128,1,float16,fp8,0,0.2872320016225179
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,12,1,128,1,fp8,fp8,0,0.2650453249613444
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,12,12,128,1,float16,fp8,0,0.2995199958483378
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,12,12,128,1,fp8,fp8,0,0.27698665857315063
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,12,2,128,1,float16,fp8,0,0.29064534107844037
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,12,2,128,1,float16,float16,0,0.2858720024426778
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,12,2,128,1,fp8,fp8,0,0.2657279968261719
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,12,4,128,1,float16,float16,0,0.2892853418986003
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,12,4,128,1,float16,fp8,0,0.2892799973487854
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,12,4,128,1,fp8,fp8,0,0.26743467648824054
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,12,1,128,1,float16,float16,0,1.89901336034139
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,12,1,128,1,float16,fp8,0,1.902389367421468
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,12,1,128,1,fp8,fp8,0,1.7099092801411946
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,12,2,128,1,float16,float16,0,1.9426933924357097
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,12,2,128,1,float16,fp8,0,1.9365545908610027
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,12,2,128,1,fp8,fp8,0,1.7181013425191243
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,12,4,128,1,float16,float16,0,1.9406506220499675
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,12,4,128,1,float16,fp8,0,1.9454293251037598
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,12,1,128,1,float16,float16,0,0.9954933325449625
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,12,1,128,1,float16,fp8,0,0.9896960258483887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,12,1,128,1,fp8,fp8,0,0.8953119913736979
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,12,2,128,1,float16,float16,0,0.9958453178405762
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,12,12,128,1,float16,float16,0,1.0443092981974285
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,12,12,128,1,float16,fp8,0,1.0548906326293945
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,12,4,128,1,fp8,fp8,0,1.7427093187967937
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,12,12,128,1,fp8,fp8,0,0.9582826296488444
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,12,2,128,1,float16,fp8,0,1.00437331199646
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,12,2,128,1,fp8,fp8,0,0.899072011311849
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,12,1,128,1,float16,float16,0,0.5331626733144125
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,12,1,128,1,float16,fp8,0,0.5307733217875162
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,12,4,128,1,float16,float16,0,1.0067520141601562
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,12,12,128,1,float16,float16,0,0.5608106851577759
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,12,4,128,1,float16,fp8,0,1.0091573397318523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,12,4,128,1,fp8,fp8,0,0.9106773535410563
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,12,12,128,1,float16,fp8,0,0.5669546524683634
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,12,12,128,1,fp8,fp8,0,0.5213813384373983
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,12,1,128,1,fp8,fp8,0,0.48930132389068604
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,12,2,128,1,float16,float16,0,0.5362133185068766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,12,2,128,1,float16,fp8,0,0.535210649172465
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,12,2,128,1,fp8,fp8,0,0.49032533168792725
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,12,4,128,1,float16,float16,0,0.5410133202870687
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,12,4,128,1,float16,fp8,0,0.5427199999491373
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,12,4,128,1,fp8,fp8,0,0.4978346824645996
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,12,1,128,1,float16,float16,0,0.30293333530426025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,12,12,128,1,float16,fp8,0,0.3274986743927002
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,12,12,128,1,float16,float16,0,0.3258026639620463
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,12,1,128,1,float16,fp8,0,0.30430400371551514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,12,12,128,1,fp8,fp8,0,0.3036160071690877
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,12,1,128,1,fp8,fp8,0,0.2811200022697449
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,12,2,128,1,float16,float16,0,0.3063466747601827
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,12,2,128,1,float16,fp8,0,0.30668799082438153
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,12,2,128,1,fp8,fp8,0,0.285866657892863
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,12,4,128,1,float16,float16,0,0.31112533807754517
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,12,4,128,1,float16,fp8,0,0.31249600648880005
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,12,4,128,1,fp8,fp8,0,0.29100799560546875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,12,12,128,1,float16,float16,0,0.20514132579167685
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,12,1,128,1,float16,float16,0,0.20070934295654297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,12,1,128,1,float16,fp8,0,0.20069867372512817
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,12,12,128,1,fp8,fp8,0,0.1945599913597107
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,12,12,128,1,float16,fp8,0,0.20787199338277182
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,12,1,128,1,fp8,fp8,0,0.18639467159907022
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,12,2,128,1,float16,float16,0,0.2010506590207418
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,12,2,128,1,fp8,fp8,0,0.1868799924850464
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,12,2,128,1,float16,fp8,0,0.20325867335001627
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,12,4,128,1,fp8,fp8,0,0.18705066045125326
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,12,4,128,1,float16,fp8,0,0.20292266209920248
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,12,4,128,1,float16,float16,0,0.20036800702412924
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,12,1,128,1,float16,float16,0,1.9752960205078125
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,12,1,128,1,float16,fp8,0,1.98963197072347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,12,1,128,1,fp8,fp8,0,1.834837277730306
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,12,2,128,1,float16,float16,0,1.9848480224609375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,12,2,128,1,float16,fp8,0,1.9848586718241374
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,12,2,128,1,fp8,fp8,0,1.8300693829854329
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,12,4,128,1,float16,float16,0,2.01147731145223
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,12,4,128,1,float16,fp8,0,2.005669275919596
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,12,1,128,1,float16,float16,0,1.0077919960021973
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,12,1,128,1,float16,fp8,0,1.0064213275909424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,12,1,128,1,fp8,fp8,0,0.9357653458913168
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,12,2,128,1,float16,float16,0,1.0101706981658936
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,12,12,128,1,float16,float16,0,1.0883413155873616
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,12,12,128,1,float16,fp8,0,1.1047253608703613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,12,4,128,1,fp8,fp8,0,1.8629973729451497
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,12,2,128,1,float16,fp8,0,1.014954646428426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,12,12,128,1,fp8,fp8,0,1.0108373165130615
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,12,2,128,1,fp8,fp8,0,0.9388480186462402
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,12,1,128,1,float16,float16,0,0.5341920057932535
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,12,1,128,1,float16,fp8,0,0.5318026542663574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,12,4,128,1,float16,float16,0,1.0262133280436199
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,12,4,128,1,float16,fp8,0,1.0269013245900471
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,12,12,128,1,float16,fp8,0,0.5751519997914633
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,12,12,128,1,float16,float16,0,0.5744693279266357
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,12,4,128,1,fp8,fp8,0,0.9545333385467529
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,12,12,128,1,fp8,fp8,0,0.5379360119501749
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,12,1,128,1,fp8,fp8,0,0.4971359968185425
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,12,2,128,1,float16,float16,0,0.5341866811116537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,12,2,128,1,float16,fp8,0,0.5362346569697062
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,12,2,128,1,fp8,fp8,0,0.5005653301874796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,12,4,128,1,float16,float16,0,0.5413599809010824
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,12,1,128,1,float16,float16,0,0.2950826684633891
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,12,4,128,1,float16,fp8,0,0.5488586823145548
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,12,1,128,1,float16,fp8,0,0.2947360078493754
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,12,4,128,1,fp8,fp8,0,0.5073920090993246
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,12,12,128,1,float16,float16,0,0.3158986568450928
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,12,12,128,1,float16,fp8,0,0.3206826647122701
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,12,12,128,1,fp8,fp8,0,0.3002026677131653
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,12,1,128,1,fp8,fp8,0,0.2786933382352193
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,12,2,128,1,float16,float16,0,0.29781333605448407
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,12,2,128,1,fp8,fp8,0,0.28040534257888794
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,12,2,128,1,float16,fp8,0,0.29920534292856854
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,12,4,128,1,float16,float16,0,0.3025919993718465
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,12,4,128,1,float16,fp8,0,0.304639995098114
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,12,12,128,1,float16,float16,0,0.19012266397476196
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,12,4,128,1,fp8,fp8,0,0.2851840058962504
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,12,1,128,1,float16,float16,0,0.17749333381652832
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,12,1,128,1,float16,fp8,0,0.17834667364756265
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,12,12,128,1,float16,fp8,0,0.19233600298563638
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,12,12,128,1,fp8,fp8,0,0.1812480092048645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,12,1,128,1,fp8,fp8,0,0.16366933782895407
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,12,2,128,1,float16,fp8,0,0.18056533734003702
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,12,2,128,1,fp8,fp8,0,0.16605866948763529
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,12,2,128,1,float16,float16,0,0.17834667364756265
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,12,4,128,1,float16,float16,0,0.17869333426157633
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,12,1,128,1,float16,float16,0,0.12151466806729634
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,12,1,128,1,float16,fp8,0,0.12148800492286682
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,12,12,128,1,float16,float16,0,0.1225386659304301
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,12,4,128,1,float16,fp8,0,0.18127999703089395
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,12,12,128,1,float16,fp8,0,0.12595199545224509
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,12,12,128,1,fp8,fp8,0,0.11707733074824016
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,12,1,128,1,fp8,fp8,0,0.11332799990971883
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,12,4,128,1,fp8,fp8,0,0.16964799165725708
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,12,2,128,1,float16,float16,0,0.12151466806729634
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,12,2,128,1,fp8,fp8,0,0.11400000254313152
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,12,2,128,1,float16,fp8,0,0.12154133121172588
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,12,4,128,1,float16,float16,0,0.12151466806729634
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,12,4,128,1,float16,fp8,0,0.12322133779525757
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,12,4,128,1,fp8,fp8,0,0.11332266529401143
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,12,1,128,1,float16,float16,0,1.2999520301818848
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,12,1,128,1,float16,fp8,0,1.3034079869588215
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,12,1,128,1,fp8,fp8,0,1.2202719847361247
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,12,2,128,1,float16,float16,0,1.3085013230641682
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,12,2,128,1,float16,fp8,0,1.3054453531901042
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,12,2,128,1,fp8,fp8,0,1.2448426882425945
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,12,4,128,1,float16,float16,0,1.326416015625
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,12,4,128,1,float16,fp8,0,1.3257280190785725
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,12,1,128,1,float16,float16,0,0.6683306694030762
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,12,1,128,1,float16,fp8,0,0.6775519847869873
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,12,1,128,1,fp8,fp8,0,0.6330133279164633
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,12,2,128,1,float16,float16,0,0.6737973690032959
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,12,12,128,1,float16,float16,0,0.7215786774953207
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,12,12,128,1,float16,fp8,0,0.7246507008870443
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,12,4,128,1,fp8,fp8,0,1.252010663350423
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,12,2,128,1,float16,fp8,0,0.67413330078125
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,12,12,128,1,fp8,fp8,0,0.6915413538614908
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,12,2,128,1,fp8,fp8,0,0.6422186692555746
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,12,1,128,1,float16,float16,0,0.35925865173339844
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,12,4,128,1,float16,float16,0,0.6850506464640299
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,12,1,128,1,float16,fp8,0,0.3616480032602946
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,12,4,128,1,float16,fp8,0,0.6864213148752848
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,12,12,128,1,float16,float16,0,0.3845173517862956
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,12,12,128,1,float16,fp8,0,0.3915146589279175
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,12,4,128,1,fp8,fp8,0,0.648698647816976
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,12,12,128,1,fp8,fp8,0,0.37086931864420575
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,12,1,128,1,fp8,fp8,0,0.3404800097147624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,12,2,128,1,float16,float16,0,0.36266132195790607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,12,2,128,1,float16,fp8,0,0.36505599816640216
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,12,2,128,1,fp8,fp8,0,0.3432106574376424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,12,4,128,1,float16,float16,0,0.3677866856257121
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,12,12,128,1,float16,float16,0,0.22050132354100546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,12,4,128,1,float16,fp8,0,0.368127981821696
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,12,4,128,1,fp8,fp8,0,0.34935466448465985
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,12,12,128,1,float16,fp8,0,0.22220265865325928
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,12,1,128,1,float16,float16,0,0.20041600863138834
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,12,1,128,1,float16,fp8,0,0.20086934169133505
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,12,12,128,1,fp8,fp8,0,0.21094934145609537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,12,1,128,1,fp8,fp8,0,0.18943999210993448
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,12,2,128,1,float16,float16,0,0.2034346659978231
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,12,2,128,1,float16,fp8,0,0.2027413249015808
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,12,2,128,1,fp8,fp8,0,0.19490132729212442
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,12,4,128,1,float16,float16,0,0.20890132586161295
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,12,1,128,1,float16,float16,0,0.12732266386349997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,12,4,128,1,float16,fp8,0,0.21025600035985312
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,12,1,128,1,fp8,fp8,0,0.11844266454378764
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,12,1,128,1,float16,fp8,0,0.1262986660003662
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,12,4,128,1,fp8,fp8,0,0.19831999142964682
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,12,12,128,1,float16,float16,0,0.1346506675084432
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,12,12,128,1,float16,fp8,0,0.13550933202107748
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,12,12,128,1,fp8,fp8,0,0.13056000073750815
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,12,2,128,1,float16,float16,0,0.1276586651802063
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,12,2,128,1,fp8,fp8,0,0.11947199702262878
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,12,12,128,1,float16,float16,0,0.08328000207742055
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,12,2,128,1,float16,fp8,0,0.1283466617266337
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,12,12,128,1,float16,fp8,0,0.08363733688990276
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,12,4,128,1,float16,float16,0,0.12902399897575378
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,12,4,128,1,float16,fp8,0,0.12833600242932638
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,12,4,128,1,fp8,fp8,0,0.1204906702041626
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,12,1,128,1,float16,float16,0,0.08055999875068665
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,12,2,128,1,float16,float16,0,0.08054933448632558
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,12,1,128,1,float16,fp8,0,0.08258666594823201
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,12,2,128,1,float16,fp8,0,0.08226666847864787
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,12,2,128,1,fp8,fp8,0,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,12,4,128,1,float16,float16,0,0.08123733103275299
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,12,1,128,1,fp8,fp8,0,0.07646400233109792
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,12,12,128,1,fp8,fp8,0,0.07850133379300435
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,12,4,128,1,float16,fp8,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,12,4,128,1,fp8,fp8,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,12,1,128,1,float16,fp8,0,1.4888960520426433
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,12,1,128,1,float16,float16,0,1.4789973894755046
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,12,1,128,1,fp8,fp8,0,1.4407679239908855
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,12,2,128,1,float16,float16,0,1.4991413752237956
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,12,2,128,1,float16,fp8,0,1.5155199368794758
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,12,2,128,1,fp8,fp8,0,1.4535679817199707
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,12,4,128,1,float16,float16,0,1.516544024149577
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,12,4,128,1,float16,fp8,0,1.516208012898763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,12,1,128,1,float16,float16,0,0.7652746836344401
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,12,1,128,1,float16,fp8,0,0.7550613085428873
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,12,1,128,1,fp8,fp8,0,0.7376213073730469
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,12,2,128,1,float16,float16,0,0.7584479649861654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,12,12,128,1,float16,float16,0,0.8145919640858968
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,12,12,128,1,float16,fp8,0,0.8285866578420004
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,12,4,128,1,fp8,fp8,0,1.4868480364481609
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,12,12,128,1,fp8,fp8,0,0.8125653266906738
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,12,2,128,1,float16,fp8,0,0.7703893184661865
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,12,2,128,1,fp8,fp8,0,0.7396693229675293
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,12,4,128,1,float16,float16,0,0.7703893184661865
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,12,1,128,1,float16,float16,0,0.39764801661173504
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,12,1,128,1,float16,fp8,0,0.3935626745223999
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,12,4,128,1,float16,fp8,0,0.7707306543986002
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,12,4,128,1,fp8,fp8,0,0.7529813448588053
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,12,12,128,1,float16,float16,0,0.4261546532313029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,12,12,128,1,float16,fp8,0,0.43332799275716144
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,12,1,128,1,fp8,fp8,0,0.3851999839146932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,12,12,128,1,fp8,fp8,0,0.4254719813664754
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,12,2,128,1,float16,float16,0,0.401039997736613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,12,2,128,1,fp8,fp8,0,0.3861973285675049
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,12,2,128,1,float16,fp8,0,0.3983360131581624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,12,1,128,1,float16,float16,0,0.2133386731147766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,12,12,128,1,float16,float16,0,0.23176532983779907
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,12,4,128,1,float16,float16,0,0.4024266799290975
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,12,4,128,1,float16,fp8,0,0.40584532419840497
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,12,4,128,1,fp8,fp8,0,0.39628799756368
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,12,1,128,1,float16,fp8,0,0.21230934063593546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,12,12,128,1,float16,fp8,0,0.2385973334312439
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,12,12,128,1,fp8,fp8,0,0.23175466060638428
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,12,1,128,1,fp8,fp8,0,0.21128533283869425
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,12,2,128,1,float16,float16,0,0.21572266022364298
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,12,2,128,1,fp8,fp8,0,0.21093332767486572
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,12,2,128,1,float16,fp8,0,0.21708800395329794
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,12,4,128,1,float16,float16,0,0.22016533215840658
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,12,4,128,1,fp8,fp8,0,0.21740800142288208
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,12,4,128,1,float16,fp8,0,0.22187199195226034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,12,12,128,1,float16,float16,0,0.13380799690882364
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,12,1,128,1,float16,float16,0,0.12219199538230896
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,12,1,128,1,float16,fp8,0,0.1225386659304301
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,12,2,128,1,float16,float16,0,0.1225440005461375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,12,1,128,1,fp8,fp8,0,0.11809600392977397
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,12,12,128,1,float16,fp8,0,0.13635733723640442
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,12,12,128,1,fp8,fp8,0,0.13379733761151633
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,12,2,128,1,float16,fp8,0,0.12150933345158894
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,12,2,128,1,fp8,fp8,0,0.11980799833933513
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,12,4,128,1,float16,float16,0,0.12356799840927124
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,12,4,128,1,float16,fp8,0,0.12390399972597758
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,12,4,128,1,fp8,fp8,0,0.12151466806729634
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,12,12,128,1,float16,fp8,0,0.0846453309059143
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,12,12,128,1,float16,float16,0,0.08225599924723308
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,12,1,128,1,float16,float16,0,0.08055999875068665
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,12,1,128,1,float16,fp8,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,12,12,128,1,fp8,fp8,0,0.08090133468310039
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,12,1,128,1,fp8,fp8,0,0.07577600081761678
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,12,2,128,1,fp8,fp8,0,0.07542933523654938
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,12,2,128,1,float16,fp8,0,0.08160000046094258
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,12,2,128,1,float16,float16,0,0.08089600006739299
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,12,4,128,1,float16,float16,0,0.08157866696516673
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,12,4,128,1,fp8,fp8,0,0.077824001510938
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,12,12,128,1,float16,float16,0,0.05973866581916809
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,12,4,128,1,float16,fp8,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,12,1,128,1,float16,float16,0,0.06006933252016703
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,12,12,128,1,fp8,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,12,1,128,1,float16,fp8,0,0.05972800155480703
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,12,12,128,1,float16,fp8,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,12,2,128,1,float16,fp8,0,0.05974400043487549
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,12,1,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,12,4,128,1,float16,float16,0,0.06006933252016703
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,12,4,128,1,float16,fp8,0,0.060415998101234436
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,12,2,128,1,fp8,fp8,0,0.05563200016816457
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,12,4,128,1,fp8,fp8,0,0.05734399954477946
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,12,2,128,1,float16,float16,0,0.06006933252016703
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,12,1,128,1,float16,float16,0,1.0613760153452556
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,12,1,128,1,float16,fp8,0,1.0620640118916829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,12,1,128,1,fp8,fp8,0,1.050112009048462
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,12,2,128,1,float16,float16,0,1.0709280172983806
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,12,2,128,1,float16,fp8,0,1.0712746779123943
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,12,2,128,1,fp8,fp8,0,1.0528426965077717
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,12,4,128,1,float16,float16,0,1.0801493326822917
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,12,4,128,1,float16,fp8,0,1.0978986422220867
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,12,1,128,1,float16,float16,0,0.5334879954655966
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,12,1,128,1,float16,fp8,0,0.5386240084966024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,12,1,128,1,fp8,fp8,0,0.532480001449585
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,12,2,128,1,float16,float16,0,0.5430613358815511
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,12,12,128,1,float16,float16,0,0.5884639819463094
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,12,12,128,1,float16,fp8,0,0.5932373205820719
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,12,4,128,1,fp8,fp8,0,1.0736587047576904
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,12,12,128,1,fp8,fp8,0,0.5935680071512858
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,12,2,128,1,float16,fp8,0,0.5396426518758138
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,12,2,128,1,fp8,fp8,0,0.5355573495229086
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,12,4,128,1,float16,float16,0,0.5492053429285685
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,12,1,128,1,float16,float16,0,0.28278932968775433
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,12,1,128,1,float16,fp8,0,0.28245333830515545
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,12,12,128,1,float16,float16,0,0.3097599943478902
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,12,4,128,1,fp8,fp8,0,0.5505653222401937
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,12,4,128,1,float16,fp8,0,0.5577439864476522
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,12,12,128,1,float16,fp8,0,0.31249066193898517
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,12,12,128,1,fp8,fp8,0,0.31279999017715454
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,12,1,128,1,fp8,fp8,0,0.2821120023727417
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,12,2,128,1,float16,float16,0,0.2842026750246684
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,12,2,128,1,float16,fp8,0,0.2865599989891052
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,12,2,128,1,fp8,fp8,0,0.28722665707270306
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,12,4,128,1,float16,float16,0,0.29099200169245404
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,12,4,128,1,float16,fp8,0,0.29474133253097534
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,12,4,128,1,fp8,fp8,0,0.29234667619069415
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,12,12,128,1,float16,float16,0,0.17169066270192465
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,12,12,128,1,float16,fp8,0,0.17390932639439902
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,12,1,128,1,float16,float16,0,0.1520693302154541
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,12,1,128,1,float16,fp8,0,0.15377599994341531
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,12,12,128,1,fp8,fp8,0,0.17390400171279907
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,12,1,128,1,fp8,fp8,0,0.1518933375676473
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,12,2,128,1,float16,float16,0,0.1558133363723755
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,12,2,128,1,float16,fp8,0,0.15478932857513428
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,12,2,128,1,fp8,fp8,0,0.15837867061297098
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,12,4,128,1,float16,float16,0,0.16059733430544534
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,12,4,128,1,float16,fp8,0,0.16076266765594482
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,12,12,128,1,float16,float16,0,0.0993280013402303
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,12,4,128,1,fp8,fp8,0,0.16077333688735962
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,12,12,128,1,float16,fp8,0,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,12,1,128,1,float16,float16,0,0.09387200077374776
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,12,1,128,1,float16,fp8,0,0.09454400340716045
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,12,1,128,1,fp8,fp8,0,0.08873066306114197
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,12,12,128,1,fp8,fp8,0,0.10308800141016643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,12,2,128,1,float16,fp8,0,0.09283733367919922
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,12,2,128,1,float16,float16,0,0.0942080020904541
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,12,2,128,1,fp8,fp8,0,0.09011200070381165
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,12,4,128,1,float16,float16,0,0.0942133367061615
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,12,4,128,1,float16,fp8,0,0.09591466188430786
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,12,12,128,1,float16,float16,0,0.060415998101234436
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,12,4,128,1,fp8,fp8,0,0.09215999643007915
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,12,1,128,1,float16,fp8,0,0.05972800155480703
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,12,1,128,1,fp8,fp8,0,0.05529599885145823
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,12,1,128,1,float16,float16,0,0.05804799993832906
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,12,12,128,1,fp8,fp8,0,0.05734399954477946
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,12,12,128,1,float16,fp8,0,0.06178666651248932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,12,2,128,1,float16,float16,0,0.05799466868241628
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,12,2,128,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,12,2,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,12,4,128,1,float16,float16,0,0.05973866581916809
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,12,4,128,1,float16,fp8,0,0.05972800155480703
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,12,4,128,1,fp8,fp8,0,0.05495999753475189
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,12,12,128,1,float16,float16,0,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,12,12,128,1,fp8,fp8,0,0.049498667319615684
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,12,1,128,1,float16,fp8,0,0.04911999901135763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,12,1,128,1,fp8,fp8,0,0.04573333263397217
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,12,2,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,12,12,128,1,float16,fp8,0,0.04981866478919983
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,12,2,128,1,fp8,fp8,0,0.0484746644894282
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,12,2,128,1,float16,fp8,0,0.04980800052483877
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,12,4,128,1,float16,float16,0,0.049839998284975685
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,12,4,128,1,float16,fp8,0,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,12,1,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,12,4,128,1,fp8,fp8,0,0.04577066500981649
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,12,1,128,1,float16,float16,0,1.1436373392740886
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,12,1,128,1,fp8,fp8,0,1.1933013598124187
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,12,1,128,1,float16,fp8,0,1.1508053143819172
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,12,2,128,1,float16,float16,0,1.1600159804026287
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,12,2,128,1,float16,fp8,0,1.1572960217793782
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,12,2,128,1,fp8,fp8,0,1.2455253601074219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,12,4,128,1,float16,fp8,0,1.1719679832458496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,12,4,128,1,float16,float16,0,1.1719679832458496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,12,1,128,1,float16,fp8,0,0.579584002494812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,12,1,128,1,float16,float16,0,0.5812906821568807
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,12,1,128,1,fp8,fp8,0,0.6096213261286417
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,12,2,128,1,float16,float16,0,0.5905280113220215
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,12,12,128,1,float16,float16,0,0.6398346821467081
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,12,12,128,1,float16,fp8,0,0.6350506544113159
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,12,4,128,1,fp8,fp8,0,1.2678826649983723
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,12,12,128,1,fp8,fp8,0,0.6696960131327311
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,12,2,128,1,fp8,fp8,0,0.6287360191345215
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,12,2,128,1,float16,fp8,0,0.5894826650619507
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,12,4,128,1,float16,float16,0,0.5997173388799032
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,12,1,128,1,float16,float16,0,0.2988106608390808
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,12,1,128,1,float16,fp8,0,0.30088533957799274
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,12,4,128,1,float16,fp8,0,0.5935893456141154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,12,4,128,1,fp8,fp8,0,0.6299360195795695
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,12,12,128,1,float16,float16,0,0.33535468578338623
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,12,12,128,1,float16,fp8,0,0.32614399989446
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,12,1,128,1,fp8,fp8,0,0.3145386576652527
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,12,12,128,1,fp8,fp8,0,0.3456000089645386
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,12,2,128,1,float16,float16,0,0.30498133103052777
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,12,2,128,1,float16,fp8,0,0.30396799246470135
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,12,2,128,1,fp8,fp8,0,0.32201600074768066
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,12,1,128,1,float16,float16,0,0.15981866916020712
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,12,4,128,1,float16,float16,0,0.30873600641886395
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,12,12,128,1,float16,float16,0,0.1795360048611959
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,12,4,128,1,fp8,fp8,0,0.32443199555079144
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,12,4,128,1,float16,fp8,0,0.31143999099731445
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,12,12,128,1,float16,fp8,0,0.17614932854970297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,12,12,128,1,fp8,fp8,0,0.18448533614476523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,12,1,128,1,float16,fp8,0,0.16196266810099283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,12,1,128,1,fp8,fp8,0,0.16571733355522156
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,12,2,128,1,float16,float16,0,0.16400532921155295
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,12,2,128,1,fp8,fp8,0,0.17101333538691202
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,12,2,128,1,float16,fp8,0,0.164000004529953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,12,4,128,1,float16,fp8,0,0.16606400410334268
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,12,12,128,1,float16,float16,0,0.10035199920336406
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,12,4,128,1,float16,float16,0,0.167087992032369
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,12,4,128,1,fp8,fp8,0,0.17253865798314413
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,12,12,128,1,float16,fp8,0,0.1013706624507904
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,12,1,128,1,float16,float16,0,0.09250133236249287
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,12,1,128,1,float16,fp8,0,0.09150399764378865
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,12,1,128,1,fp8,fp8,0,0.09145067135492961
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,12,2,128,1,float16,float16,0,0.09250133236249287
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,12,12,128,1,fp8,fp8,0,0.10479467113812764
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,12,2,128,1,fp8,fp8,0,0.09249066313107808
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,12,2,128,1,float16,fp8,0,0.09284800291061401
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,12,4,128,1,float16,float16,0,0.09283733367919922
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,12,1,128,1,float16,float16,0,0.05871466795603434
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,12,4,128,1,float16,fp8,0,0.09418666362762451
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,12,1,128,1,float16,fp8,0,0.05836800237496694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,12,12,128,1,float16,float16,0,0.060415998101234436
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,12,4,128,1,fp8,fp8,0,0.09522666533788045
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,12,1,128,1,fp8,fp8,0,0.05699733396371206
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,12,12,128,1,float16,fp8,0,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,12,12,128,1,fp8,fp8,0,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,12,2,128,1,float16,float16,0,0.05905066430568695
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,12,2,128,1,fp8,fp8,0,0.05700799822807312
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,12,2,128,1,float16,fp8,0,0.058703998724619545
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,12,4,128,1,fp8,fp8,0,0.057002668579419456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,12,4,128,1,float16,float16,0,0.05836800237496694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,12,4,128,1,float16,fp8,0,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,12,12,128,1,float16,float16,0,0.03924266745646795
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,12,12,128,1,float16,fp8,0,0.03993066648642222
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,12,12,128,1,fp8,fp8,0,0.038912000755469
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,12,1,128,1,float16,float16,0,0.03926933308442434
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,12,1,128,1,fp8,fp8,0,0.03583999971548716
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,12,2,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,12,1,128,1,float16,fp8,0,0.03822933385769526
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,12,2,128,1,float16,fp8,0,0.03823466598987579
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,12,4,128,1,float16,fp8,0,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,12,2,128,1,fp8,fp8,0,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,12,4,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,12,1,128,1,float16,float16,0,0.03549333413441976
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,12,12,128,1,float16,float16,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,12,12,128,1,float16,fp8,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,12,1,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,12,4,128,1,fp8,fp8,0,0.03893866638342539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,12,12,128,1,fp8,fp8,0,0.03449599941571554
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,12,1,128,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,12,2,128,1,float16,float16,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,12,2,128,1,float16,fp8,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,12,4,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,12,2,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,12,4,128,1,float16,fp8,0,0.03719466676314672
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,12,4,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,12,1,128,1,float16,float16,0,1.0036906401316326
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,12,1,128,1,fp8,fp8,0,1.0682079792022705
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,12,1,128,1,float16,fp8,0,1.0036640167236328
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,12,2,128,1,float16,float16,0,1.023146629333496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,12,2,128,1,fp8,fp8,0,1.1194079717000325
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,12,2,128,1,float16,fp8,0,1.0183680057525635
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,12,4,128,1,float16,float16,0,1.0350933074951172
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,12,4,128,1,float16,fp8,0,1.0340639750162761
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,12,1,128,1,float16,float16,0,0.5128533442815145
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,12,1,128,1,float16,fp8,0,0.5090933243433634
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,12,1,128,1,fp8,fp8,0,0.5434026718139648
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,12,12,128,1,float16,float16,0,0.5676373243331909
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,12,12,128,1,float16,fp8,0,0.5587626695632935
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,12,2,128,1,float16,float16,0,0.5235999822616577
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,12,2,128,1,float16,fp8,0,0.5182826519012451
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,12,4,128,1,fp8,fp8,0,1.1426080067952473
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,12,2,128,1,fp8,fp8,0,0.5642026662826538
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,12,12,128,1,fp8,fp8,0,0.609279990196228
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,12,1,128,1,float16,float16,0,0.2650453249613444
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,12,4,128,1,float16,float16,0,0.527018666267395
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,12,1,128,1,float16,fp8,0,0.2643679976463318
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,12,4,128,1,float16,fp8,0,0.5239413181940714
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,12,4,128,1,fp8,fp8,0,0.5666133165359497
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,12,12,128,1,float16,fp8,0,0.29099200169245404
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,12,12,128,1,fp8,fp8,0,0.31454400221506756
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,12,12,128,1,float16,float16,0,0.29815467198689777
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,12,1,128,1,fp8,fp8,0,0.2821120023727417
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,12,2,128,1,float16,float16,0,0.26982933282852173
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,12,2,128,1,float16,fp8,0,0.2688053250312805
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,12,1,128,1,float16,float16,0,0.14148799578348795
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,12,2,128,1,fp8,fp8,0,0.28962133328119916
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,12,4,128,1,float16,float16,0,0.27222933371861774
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,12,4,128,1,float16,fp8,0,0.27186665932337445
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,12,4,128,1,fp8,fp8,0,0.2940586606661479
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,12,12,128,1,float16,fp8,0,0.1564959983030955
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,12,12,128,1,fp8,fp8,0,0.16708266735076904
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,12,12,128,1,float16,float16,0,0.16059199968973795
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,12,1,128,1,float16,fp8,0,0.14200533429781595
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,12,1,128,1,fp8,fp8,0,0.14847466349601746
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,12,2,128,1,float16,float16,0,0.1460906664530436
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,12,2,128,1,float16,fp8,0,0.1430186629295349
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,12,2,128,1,fp8,fp8,0,0.15310933192571005
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,12,4,128,1,float16,fp8,0,0.14677866299947104
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,12,4,128,1,float16,float16,0,0.1455733378728231
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,12,4,128,1,fp8,fp8,0,0.15872533122698465
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,12,12,128,1,float16,float16,0,0.09010666608810425
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,12,12,128,1,float16,fp8,0,0.08771733442942302
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,12,1,128,1,float16,float16,0,0.08158400158087413
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,12,1,128,1,float16,fp8,0,0.0802346666653951
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,12,1,128,1,fp8,fp8,0,0.08328533172607422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,12,12,128,1,fp8,fp8,0,0.09589333335558574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,12,2,128,1,float16,float16,0,0.08090133468310039
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,12,2,128,1,fp8,fp8,0,0.08329066634178162
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,12,2,128,1,float16,fp8,0,0.08192533254623413
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,12,4,128,1,float16,float16,0,0.08123200138409932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,12,4,128,1,float16,fp8,0,0.08226666847864787
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,12,12,128,1,float16,float16,0,0.05392533540725708
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,12,4,128,1,fp8,fp8,0,0.08635200063387553
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,12,12,128,1,float16,fp8,0,0.052906667192777
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,12,1,128,1,float16,float16,0,0.050853331883748375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,12,12,128,1,fp8,fp8,0,0.05364799996217092
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,12,1,128,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,12,2,128,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,12,2,128,1,float16,float16,0,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,12,1,128,1,fp8,fp8,0,0.05085866649945577
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,12,4,128,1,float16,float16,0,0.05256533126036326
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,12,4,128,1,float16,fp8,0,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,12,2,128,1,fp8,fp8,0,0.05188799897829691
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,12,4,128,1,fp8,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,12,12,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,12,1,128,1,float16,fp8,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,12,12,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,12,1,128,1,float16,float16,0,0.03345600018898646
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,12,12,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,12,1,128,1,fp8,fp8,0,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,12,2,128,1,float16,fp8,0,0.0341333324710528
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,12,2,128,1,float16,float16,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,12,2,128,1,fp8,fp8,0,0.032416000962257385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,12,4,128,1,float16,float16,0,0.0334346666932106
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,12,4,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,12,4,128,1,fp8,fp8,0,0.03448000053564707
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,12,12,128,1,float16,float16,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,12,12,128,1,float16,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,12,12,128,1,fp8,fp8,0,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,12,1,128,1,float16,float16,0,0.029359998802344005
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,12,1,128,1,float16,fp8,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,12,1,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,12,2,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,12,2,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,12,2,128,1,fp8,fp8,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,12,4,128,1,float16,fp8,0,0.03073599934577942
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,12,4,128,1,float16,float16,0,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,12,4,128,1,fp8,fp8,0,0.028677334388097126
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,12,12,128,1,fp8,fp8,0,0.025600001215934753
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,12,1,128,1,float16,fp8,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,12,12,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,12,12,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,12,1,128,1,fp8,fp8,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,12,2,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,12,2,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,12,4,128,1,float16,float16,0,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,12,2,128,1,float16,fp8,0,0.025941332181294758
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,12,4,128,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,12,1,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,12,4,128,1,fp8,fp8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,12,1,128,1,float16,float16,0,0.39798935254414874
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,12,1,128,1,fp8,fp8,0,0.4065279960632324
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,12,1,128,1,float16,fp8,0,0.39185599486033124
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,12,2,128,1,float16,float16,0,0.4075626532236735
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,12,2,128,1,fp8,fp8,0,0.4251306851704915
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,12,2,128,1,float16,fp8,0,0.4045120080312093
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,12,4,128,1,float16,float16,0,0.4154026508331299
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,12,4,128,1,float16,fp8,0,0.41095467408498126
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,12,1,128,1,float16,float16,0,0.20821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,12,1,128,1,float16,fp8,0,0.2076693375905355
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,12,1,128,1,fp8,fp8,0,0.21300800641377768
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,12,12,128,1,float16,float16,0,0.24305067459742227
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,12,4,128,1,fp8,fp8,0,0.42614932854970294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,12,12,128,1,float16,fp8,0,0.23653866847356161
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,12,2,128,1,float16,float16,0,0.2129866679509481
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,12,12,128,1,fp8,fp8,0,0.24676267306009927
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,12,2,128,1,float16,fp8,0,0.21363733212153116
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,12,2,128,1,fp8,fp8,0,0.22220800320307413
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,12,4,128,1,float16,float16,0,0.21811199188232422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,12,1,128,1,float16,float16,0,0.11572266618410747
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,12,1,128,1,float16,fp8,0,0.11571199695269267
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,12,4,128,1,float16,fp8,0,0.21404266357421875
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,12,12,128,1,float16,float16,0,0.13517866532007852
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,12,4,128,1,fp8,fp8,0,0.22388799985249838
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,12,12,128,1,float16,fp8,0,0.1307253340880076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,12,12,128,1,fp8,fp8,0,0.1346506675084432
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,12,1,128,1,fp8,fp8,0,0.11501866579055786
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,12,2,128,1,float16,float16,0,0.11946666240692139
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,12,2,128,1,float16,fp8,0,0.11844266454378764
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,12,2,128,1,fp8,fp8,0,0.12151466806729634
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,12,1,128,1,float16,float16,0,0.067930668592453
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,12,4,128,1,float16,float16,0,0.12016533811887105
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,12,4,128,1,float16,fp8,0,0.12151466806729634
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,12,12,128,1,float16,float16,0,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,12,12,128,1,float16,fp8,0,0.07509333391984303
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,12,12,128,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,12,1,128,1,float16,fp8,0,0.06655466556549072
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,12,4,128,1,fp8,fp8,0,0.12458133697509766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,12,1,128,1,fp8,fp8,0,0.06451199948787689
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,12,2,128,1,float16,float16,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,12,2,128,1,float16,fp8,0,0.06724266707897186
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,12,2,128,1,fp8,fp8,0,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,12,4,128,1,float16,float16,0,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,12,4,128,1,float16,fp8,0,0.06962666908899943
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,12,4,128,1,fp8,fp8,0,0.067930668592453
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,12,1,128,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,12,12,128,1,float16,float16,0,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,12,12,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,12,1,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,12,12,128,1,fp8,fp8,0,0.0460746685663859
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,12,1,128,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,12,2,128,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,12,2,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,12,2,128,1,fp8,fp8,0,0.0433599998553594
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,12,4,128,1,float16,fp8,0,0.04710400104522705
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,12,4,128,1,fp8,fp8,0,0.04368533194065094
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,12,1,128,1,float16,fp8,0,0.02934933453798294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,12,4,128,1,float16,float16,0,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,12,12,128,1,fp8,fp8,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,12,12,128,1,float16,float16,0,0.03038399914900462
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,12,12,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,12,2,128,1,float16,float16,0,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,12,2,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,12,4,128,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,12,1,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,12,4,128,1,fp8,fp8,0,0.029365333418051403
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,12,1,128,1,float16,float16,0,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,12,4,128,1,float16,fp8,0,0.029701332251230877
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,12,2,128,1,fp8,fp8,0,0.027295999228954315
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,12,12,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,12,12,128,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,12,12,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,12,1,128,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,12,1,128,1,float16,float16,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,12,2,128,1,float16,float16,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,12,2,128,1,fp8,fp8,0,0.023898666103680927
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,12,4,128,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,12,4,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,12,4,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,12,2,128,1,float16,fp8,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,12,12,128,1,float16,fp8,0,0.023893333971500397
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,12,1,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,12,12,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,12,12,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,12,1,128,1,float16,float16,0,0.023205332458019257
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,12,1,128,1,float16,fp8,0,0.023552000522613525
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,12,1,128,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,12,2,128,1,float16,fp8,0,0.023546665906906128
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,12,2,128,1,float16,float16,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,12,4,128,1,float16,fp8,0,0.023893333971500397
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,12,4,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,12,12,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,12,12,128,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,12,4,128,1,float16,float16,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,12,12,128,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,12,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,12,2,128,1,float16,fp8,0,0.023205332458019257
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,12,2,128,1,float16,float16,0,0.02399466683467229
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,12,2,128,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,12,1,128,1,float16,float16,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,12,4,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,12,4,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,12,4,128,1,fp8,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,12,1,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,12,2,128,1,fp8,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,12,1,128,1,float16,float16,0,0.21709332863489786
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,12,1,128,1,float16,fp8,0,0.21606399615605673
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,12,1,128,1,fp8,fp8,0,0.22356800238291422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,12,2,128,1,float16,fp8,0,0.2198186715443929
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,12,2,128,1,float16,float16,0,0.22254933913548788
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,12,2,128,1,fp8,fp8,0,0.2321066657702128
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,12,4,128,1,float16,float16,0,0.22459733486175537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,12,4,128,1,float16,fp8,0,0.22323199113210043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,12,1,128,1,float16,float16,0,0.11707733074824016
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,12,1,128,1,float16,fp8,0,0.11741866668065389
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,12,4,128,1,fp8,fp8,0,0.23517866929372153
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,12,12,128,1,float16,float16,0,0.13500266273816428
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,12,1,128,1,fp8,fp8,0,0.11943999926249187
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,12,12,128,1,float16,fp8,0,0.13192533453305563
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,12,12,128,1,fp8,fp8,0,0.13875200351079306
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,12,2,128,1,float16,fp8,0,0.11980799833933513
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,12,2,128,1,fp8,fp8,0,0.12493866682052612
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,12,2,128,1,float16,float16,0,0.11946133772532146
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,12,4,128,1,float16,float16,0,0.12185066938400269
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,12,4,128,1,float16,fp8,0,0.12117333213488261
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,12,12,128,1,float16,fp8,0,0.07509333391984303
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,12,12,128,1,float16,float16,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,12,4,128,1,fp8,fp8,0,0.12664000193277994
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,12,1,128,1,float16,float16,0,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,12,1,128,1,fp8,fp8,0,0.06586666901906331
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,12,12,128,1,fp8,fp8,0,0.07987200220425923
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,12,2,128,1,float16,fp8,0,0.06963199873765309
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,12,2,128,1,float16,float16,0,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,12,2,128,1,fp8,fp8,0,0.06758399804433186
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,12,1,128,1,float16,fp8,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,12,4,128,1,float16,float16,0,0.0699786643187205
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,12,4,128,1,float16,fp8,0,0.07065600156784058
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,12,1,128,1,float16,float16,0,0.041984001795450844
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,12,12,128,1,float16,fp8,0,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,12,4,128,1,fp8,fp8,0,0.06926933427651723
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,12,1,128,1,fp8,fp8,0,0.04266666869322459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,12,12,128,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,12,2,128,1,float16,float16,0,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,12,12,128,1,fp8,fp8,0,0.04471466441949209
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,12,2,128,1,float16,fp8,0,0.04265599946180979
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,12,1,128,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,12,2,128,1,fp8,fp8,0,0.04164800047874451
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,12,4,128,1,float16,float16,0,0.043007999658584595
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,12,4,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,12,4,128,1,fp8,fp8,0,0.043007999658584595
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,12,12,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,12,1,128,1,float16,fp8,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,12,12,128,1,fp8,fp8,0,0.03107200066248576
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,12,12,128,1,float16,float16,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,12,1,128,1,fp8,fp8,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,12,2,128,1,float16,float16,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,12,4,128,1,float16,float16,0,0.031045332551002502
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,12,2,128,1,float16,fp8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,12,2,128,1,fp8,fp8,0,0.02938133229811986
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,12,12,128,1,float16,float16,0,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,12,4,128,1,float16,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,12,1,128,1,float16,float16,0,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,12,12,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,12,4,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,12,12,128,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,12,1,128,1,fp8,fp8,0,0.021183999876181286
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,12,1,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,12,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,12,2,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,12,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,12,4,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,12,4,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,12,4,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,12,1,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,12,12,128,1,fp8,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,12,12,128,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,12,12,128,1,float16,float16,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,12,2,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,12,1,128,1,fp8,fp8,0,0.01874133323629697
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,12,2,128,1,float16,float16,0,0.018794666975736618
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,12,1,128,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,12,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,12,2,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,12,4,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,12,12,128,1,float16,float16,0,0.017749333133300144
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,12,4,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,12,12,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,12,1,128,1,float16,float16,0,0.017055999487638474
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,12,12,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,12,1,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,12,2,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,12,1,128,1,fp8,fp8,0,0.01670933390657107
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,12,2,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,12,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,12,4,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,12,4,128,1,float16,float16,0,0.017397332936525345
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,12,12,128,1,float16,float16,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,12,4,128,1,fp8,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,12,12,128,1,fp8,fp8,0,0.01809599995613098
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,12,4,128,1,float16,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,12,12,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,12,1,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,12,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,12,2,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,12,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,12,2,128,1,float16,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,12,4,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,12,4,128,1,float16,float16,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,12,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,12,1,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,12,1,128,1,fp8,fp8,0,0.15615466237068176
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,12,1,128,1,float16,fp8,0,0.14760532975196838
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,12,1,128,1,float16,float16,0,0.14816000064214072
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,12,2,128,1,float16,float16,0,0.15052266915639242
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,12,2,128,1,float16,fp8,0,0.14921067158381143
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,12,2,128,1,fp8,fp8,0,0.1629706621170044
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,12,4,128,1,float16,float16,0,0.15069866180419922
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,12,1,128,1,float16,float16,0,0.08192533254623413
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,12,1,128,1,float16,fp8,0,0.08089600006739299
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,12,12,128,1,float16,float16,0,0.09147199988365173
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,12,4,128,1,fp8,fp8,0,0.16503467162450156
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,12,4,128,1,float16,fp8,0,0.15172266960144043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,12,12,128,1,float16,fp8,0,0.09079466263453166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,12,12,128,1,fp8,fp8,0,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,12,1,128,1,fp8,fp8,0,0.08430400490760803
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,12,2,128,1,float16,float16,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,12,2,128,1,fp8,fp8,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,12,2,128,1,float16,fp8,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,12,4,128,1,float16,float16,0,0.08294400076071422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,12,12,128,1,float16,float16,0,0.05156266689300537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,12,12,128,1,float16,fp8,0,0.05156266689300537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,12,4,128,1,fp8,fp8,0,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,12,1,128,1,float16,float16,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,12,4,128,1,float16,fp8,0,0.08260799944400787
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,12,1,128,1,fp8,fp8,0,0.049829334020614624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,12,1,128,1,float16,fp8,0,0.05017599960168203
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,12,2,128,1,float16,float16,0,0.05087999999523163
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,12,2,128,1,float16,fp8,0,0.05018133421738943
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,12,12,128,1,fp8,fp8,0,0.05359466870625814
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,12,2,128,1,fp8,fp8,0,0.05153599878152212
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,12,4,128,1,float16,float16,0,0.04984533290068308
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,12,4,128,1,fp8,fp8,0,0.051221330960591636
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,12,12,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,12,12,128,1,fp8,fp8,0,0.03514666606982549
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,12,1,128,1,float16,fp8,0,0.0317493329445521
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,12,1,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,12,12,128,1,float16,float16,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,12,1,128,1,fp8,fp8,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,12,2,128,1,float16,float16,0,0.032074667513370514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,12,4,128,1,float16,fp8,0,0.05151999990145365
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,12,2,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,12,2,128,1,float16,fp8,0,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,12,4,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,12,4,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,12,12,128,1,float16,float16,0,0.026949333647886913
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,12,4,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,12,12,128,1,float16,fp8,0,0.025600001215934753
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,12,1,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,12,1,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,12,2,128,1,float16,fp8,0,0.025263999899228413
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,12,4,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,12,2,128,1,float16,float16,0,0.025621332228183746
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,12,2,128,1,fp8,fp8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,12,12,128,1,float16,float16,0,0.017743999759356182
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,12,12,128,1,float16,fp8,0,0.019093333433071773
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,12,4,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,12,4,128,1,fp8,fp8,0,0.025274666647116344
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,12,12,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,12,1,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,12,12,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,12,1,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,12,1,128,1,fp8,fp8,0,0.018794666975736618
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,12,1,128,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,12,2,128,1,float16,float16,0,0.017050666113694508
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,12,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,12,4,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,12,12,128,1,float16,float16,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,12,2,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,12,12,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,12,12,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,12,1,128,1,float16,float16,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,12,1,128,1,float16,fp8,0,0.016704000532627106
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,12,1,128,1,fp8,fp8,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,12,4,128,1,float16,float16,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,12,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,12,4,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,12,4,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,12,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,12,2,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,12,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,12,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,12,1,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,12,1,128,1,float16,fp8,0,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,12,12,128,1,float16,fp8,0,0.016741332908471424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,12,1,128,1,fp8,fp8,0,0.016741332908471424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,12,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,12,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,12,4,128,1,float16,float16,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,12,2,128,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,12,12,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,12,4,128,1,fp8,fp8,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,12,12,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,12,12,128,1,float16,fp8,0,0.01674666628241539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,12,4,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,12,12,128,1,fp8,fp8,0,0.016773333152135212
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,12,12,128,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,12,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,12,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,12,1,128,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,12,4,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,12,2,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,12,2,128,1,fp8,fp8,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,12,4,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,12,1,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,12,4,128,1,float16,float16,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,12,1,128,1,fp8,fp8,0,0.12732266386349997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,12,1,128,1,float16,float16,0,0.11674132943153381
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,12,1,128,1,float16,fp8,0,0.11741866668065389
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,12,2,128,1,float16,float16,0,0.1181013286113739
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,12,2,128,1,float16,fp8,0,0.11843732992808025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,12,2,128,1,fp8,fp8,0,0.12868799765904745
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,12,1,128,1,float16,fp8,0,0.06791999936103821
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,12,1,128,1,float16,float16,0,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,12,4,128,1,float16,fp8,0,0.11844266454378764
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,12,4,128,1,fp8,fp8,0,0.13158933321634927
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,12,4,128,1,float16,float16,0,0.12117866675059001
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,12,12,128,1,float16,fp8,0,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,12,12,128,1,float16,float16,0,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,12,12,128,1,fp8,fp8,0,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,12,1,128,1,fp8,fp8,0,0.07169066866238911
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,12,2,128,1,float16,float16,0,0.068271999557813
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,12,2,128,1,fp8,fp8,0,0.07236800094445546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,12,2,128,1,float16,fp8,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,12,4,128,1,float16,fp8,0,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,12,4,128,1,fp8,fp8,0,0.0730453332265218
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,12,12,128,1,float16,fp8,0,0.04232533276081085
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,12,12,128,1,float16,float16,0,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,12,1,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,12,12,128,1,fp8,fp8,0,0.04574933151404063
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,12,1,128,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,12,4,128,1,float16,float16,0,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,12,2,128,1,float16,fp8,0,0.04164800047874451
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,12,1,128,1,fp8,fp8,0,0.04368533194065094
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,12,4,128,1,fp8,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,12,4,128,1,float16,float16,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,12,4,128,1,float16,fp8,0,0.04301333427429199
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,12,2,128,1,fp8,fp8,0,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,12,2,128,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,12,12,128,1,float16,fp8,0,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,12,1,128,1,float16,float16,0,0.02902399996916453
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,12,1,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,12,12,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,12,2,128,1,float16,float16,0,0.027647999425729115
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,12,1,128,1,fp8,fp8,0,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,12,12,128,1,float16,float16,0,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,12,2,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,12,2,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,12,4,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,12,4,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,12,12,128,1,float16,float16,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,12,4,128,1,fp8,fp8,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,12,1,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,12,12,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,12,1,128,1,fp8,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,12,1,128,1,float16,float16,0,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,12,2,128,1,float16,fp8,0,0.021829334398110706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,12,2,128,1,fp8,fp8,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,12,4,128,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,12,2,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,12,12,128,1,fp8,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,12,4,128,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,12,4,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,12,12,128,1,float16,fp8,0,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,12,12,128,1,fp8,fp8,0,0.016693333784739178
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,12,12,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,12,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,12,2,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,12,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,12,2,128,1,fp8,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,12,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,12,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,12,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,12,1,128,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,12,4,128,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,12,12,128,1,float16,float16,0,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,12,12,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,12,12,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,12,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,12,1,128,1,fp8,fp8,0,0.016751999656359356
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,12,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,12,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,12,2,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,12,4,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,12,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,12,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,12,12,128,1,float16,float16,0,0.016741332908471424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,12,4,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,12,12,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,12,12,128,1,float16,fp8,0,0.015696000307798386
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,12,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,12,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,12,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,12,1,128,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,12,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,12,2,128,1,float16,float16,0,0.014997333288192749
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,12,4,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,12,4,128,1,fp8,fp8,0,0.016741332908471424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,12,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,12,12,128,1,float16,fp8,0,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,12,12,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,12,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,12,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,12,2,128,1,float16,float16,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,12,1,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,12,12,128,1,fp8,fp8,0,0.01674666628241539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,12,2,128,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,12,4,128,1,float16,fp8,0,0.016037333756685257
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,12,4,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,12,4,128,1,float16,float16,0,0.016704000532627106
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,12,2,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,12,1,128,1,float16,float16,0,0.10479467113812764
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,12,1,128,1,float16,fp8,0,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,12,1,128,1,fp8,fp8,0,0.11502933502197266
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,12,2,128,1,float16,float16,0,0.10479467113812764
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,12,2,128,1,float16,fp8,0,0.10479467113812764
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,12,2,128,1,fp8,fp8,0,0.1143839955329895
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,12,4,128,1,float16,float16,0,0.10478400190671285
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,12,4,128,1,fp8,fp8,0,0.11502933502197266
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,12,12,128,1,float16,float16,0,0.061103999614715576
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,12,4,128,1,float16,fp8,0,0.10512533783912659
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,12,12,128,1,float16,fp8,0,0.06178666651248932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,12,1,128,1,float16,float16,0,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,12,12,128,1,fp8,fp8,0,0.06587199866771698
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,12,1,128,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,12,1,128,1,fp8,fp8,0,0.06486399968465169
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,12,2,128,1,float16,fp8,0,0.06006399790445963
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,12,2,128,1,float16,float16,0,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,12,4,128,1,float16,fp8,0,0.06178666651248932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,12,4,128,1,float16,float16,0,0.06041066845258077
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,12,4,128,1,fp8,fp8,0,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,12,2,128,1,fp8,fp8,0,0.0645066648721695
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,12,1,128,1,float16,fp8,0,0.03788800040880839
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,12,12,128,1,fp8,fp8,0,0.041637333730856575
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,12,12,128,1,float16,float16,0,0.03922666609287262
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,12,1,128,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,12,2,128,1,float16,float16,0,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,12,1,128,1,fp8,fp8,0,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,12,12,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,12,2,128,1,fp8,fp8,0,0.03993066648642222
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,12,4,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,12,12,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,12,4,128,1,float16,fp8,0,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,12,4,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,12,12,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,12,2,128,1,float16,fp8,0,0.03822933385769526
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,12,12,128,1,float16,fp8,0,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,12,1,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,12,1,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,12,1,128,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,12,2,128,1,float16,fp8,0,0.025253333151340485
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,12,2,128,1,float16,float16,0,0.025605333348115284
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,12,2,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,12,4,128,1,fp8,fp8,0,0.025936000049114227
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,12,12,128,1,float16,float16,0,0.021194666624069214
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,12,12,128,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,12,4,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,12,12,128,1,fp8,fp8,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,12,4,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,12,1,128,1,float16,float16,0,0.020138667275508244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,12,1,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,12,2,128,1,float16,float16,0,0.020479999482631683
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,12,2,128,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,12,1,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,12,4,128,1,float16,float16,0,0.0207893339296182
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,12,2,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,12,4,128,1,fp8,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,12,12,128,1,float16,float16,0,0.01674666628241539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,12,12,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,12,12,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,12,1,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,12,4,128,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,12,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,12,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,12,2,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,12,2,128,1,fp8,fp8,0,0.017743999759356182
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,12,4,128,1,float16,float16,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,12,4,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,12,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,12,12,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,12,12,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,12,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,12,12,128,1,float16,float16,0,0.016042667130629223
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,12,4,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,12,1,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,12,1,128,1,float16,fp8,0,0.016389333953460056
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,12,2,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,12,2,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,12,2,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,12,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,12,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,12,12,128,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,12,12,128,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,12,12,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,12,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,12,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,12,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,12,1,128,1,float16,float16,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,12,2,128,1,float16,fp8,0,0.01669866715868314
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,12,2,128,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,12,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,12,4,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,12,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,12,12,128,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,12,12,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,12,12,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,12,1,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,12,1,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,12,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,12,4,128,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,12,2,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,12,2,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,12,2,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,12,4,128,1,float16,float16,0,0.01674666628241539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,12,4,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,12,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,8,1,128,1,float16,fp8,0,3.7845493952433267
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,8,1,128,1,float16,float16,0,3.8125387827555337
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,8,1,128,1,fp8,fp8,0,3.100501378377279
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,8,2,128,1,float16,float16,0,3.621898651123047
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,8,2,128,1,fp8,fp8,0,3.1155198415120444
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,8,2,128,1,float16,fp8,0,3.7120161056518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,8,4,128,1,float16,float16,0,3.709797223409017
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,8,4,128,1,float16,fp8,0,3.6956373850504556
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,8,1,128,1,float16,float16,0,1.875285307566325
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,8,1,128,1,float16,fp8,0,1.87391996383667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,8,1,128,1,fp8,fp8,0,1.6372052828470867
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,8,8,128,1,float16,float16,0,1.914730707804362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,8,8,128,1,float16,fp8,0,1.9512373606363933
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,8,2,128,1,float16,float16,0,1.8752907117207844
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,8,8,128,1,fp8,fp8,0,1.6834665934244792
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,8,2,128,1,float16,fp8,0,1.8867200215657551
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,8,2,128,1,fp8,fp8,0,1.64301331837972
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,8,4,128,1,fp8,fp8,0,3.1469173431396484
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,8,1,128,1,float16,float16,0,1.013594627380371
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,8,8,128,1,float16,float16,0,1.0385066668192546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,8,1,128,1,float16,fp8,0,1.027232011159261
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,8,8,128,1,float16,fp8,0,1.0480639934539795
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,8,4,128,1,float16,float16,0,1.8973013559977214
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,8,4,128,1,fp8,fp8,0,1.6542720794677734
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,8,4,128,1,float16,fp8,0,1.8942346572875977
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,8,8,128,1,fp8,fp8,0,0.9275733629862467
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,8,1,128,1,fp8,fp8,0,0.9048799673716227
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,8,2,128,1,float16,float16,0,1.018725315729777
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,8,2,128,1,fp8,fp8,0,0.9069120089213053
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,8,2,128,1,float16,fp8,0,1.0330453713734944
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,8,4,128,1,float16,float16,0,1.029637336730957
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,8,8,128,1,float16,float16,0,0.6048479874928793
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,8,4,128,1,float16,fp8,0,1.0398720105489094
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,8,4,128,1,fp8,fp8,0,0.9159680207570394
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,8,8,128,1,float16,fp8,0,0.6096213261286417
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,8,1,128,1,float16,float16,0,0.5867520173390707
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,8,1,128,1,float16,fp8,0,0.5925546487172445
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,8,8,128,1,fp8,fp8,0,0.5533013343811035
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,8,1,128,1,fp8,fp8,0,0.5341920057932535
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,8,2,128,1,float16,float16,0,0.5884586572647095
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,8,2,128,1,float16,fp8,0,0.5942666530609131
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,8,2,128,1,fp8,fp8,0,0.5416959921518961
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,8,4,128,1,float16,float16,0,0.5959626833597819
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,8,4,128,1,float16,fp8,0,0.5973333517710367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,8,4,128,1,fp8,fp8,0,0.5461386839548746
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,8,1,128,1,float16,float16,0,2.1428960164388022
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,8,1,128,1,fp8,fp8,0,1.8814293543497722
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,8,1,128,1,float16,fp8,0,2.159279982248942
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,8,2,128,1,float16,float16,0,2.1831679344177246
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,8,2,128,1,float16,fp8,0,2.158949375152588
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,8,2,128,1,fp8,fp8,0,1.8956000010172527
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,8,4,128,1,float16,float16,0,2.183157285054525
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,8,1,128,1,float16,float16,0,1.130677302678426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,8,4,128,1,float16,fp8,0,2.217301368713379
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,8,1,128,1,float16,fp8,0,1.1337440013885498
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,8,8,128,1,float16,float16,0,1.1726293563842773
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,8,8,128,1,float16,fp8,0,1.18339737256368
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,8,8,128,1,fp8,fp8,0,1.0436320304870605
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,8,1,128,1,fp8,fp8,0,1.0077866713205974
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,8,4,128,1,fp8,fp8,0,1.9136959711710613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,8,2,128,1,float16,float16,0,1.1385173002878826
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,8,2,128,1,float16,fp8,0,1.1579733689626057
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,8,2,128,1,fp8,fp8,0,1.0149333477020264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,8,1,128,1,float16,float16,0,0.6302719910939535
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,8,8,128,1,float16,float16,0,0.6493866840998331
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,8,1,128,1,float16,fp8,0,0.625493327776591
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,8,4,128,1,float16,float16,0,1.1497759819030762
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,8,4,128,1,float16,fp8,0,1.156266689300537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,8,8,128,1,float16,fp8,0,0.6533120075861613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,8,4,128,1,fp8,fp8,0,1.0204213460286458
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,8,8,128,1,fp8,fp8,0,0.590170661608378
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,8,1,128,1,fp8,fp8,0,0.571733315785726
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,8,2,128,1,float16,float16,0,0.6312959988911947
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,8,2,128,1,float16,fp8,0,0.6319839954376221
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,8,2,128,1,fp8,fp8,0,0.5714026689529419
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,8,8,128,1,float16,float16,0,0.38758933544158936
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,8,4,128,1,float16,float16,0,0.6381226778030396
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,8,4,128,1,fp8,fp8,0,0.5775306622187296
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,8,4,128,1,float16,fp8,0,0.6405119895935059
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,8,1,128,1,float16,fp8,0,0.37666134039560956
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,8,1,128,1,float16,float16,0,0.37700267632802326
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,8,8,128,1,float16,fp8,0,0.39150933424631756
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,8,1,128,1,fp8,fp8,0,0.3466240167617798
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,8,8,128,1,fp8,fp8,0,0.359935998916626
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,8,2,128,1,float16,float16,0,0.375983993212382
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,8,2,128,1,float16,fp8,0,0.37836798032124835
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,8,2,128,1,fp8,fp8,0,0.3476533492406209
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,8,4,128,1,float16,fp8,0,0.3810986677805583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,8,4,128,1,float16,float16,0,0.37905065218607586
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,8,4,128,1,fp8,fp8,0,0.3534506559371948
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,8,1,128,1,float16,fp8,0,1.5670347213745117
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,8,1,128,1,float16,float16,0,1.5609173774719238
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,8,1,128,1,fp8,fp8,0,1.3834239641825359
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,8,2,128,1,float16,float16,0,1.588058630625407
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,8,2,128,1,float16,fp8,0,1.5723412831624348
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,8,2,128,1,fp8,fp8,0,1.3919679323832195
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,8,4,128,1,float16,float16,0,1.592149257659912
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,8,1,128,1,float16,float16,0,0.8299573262532552
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,8,4,128,1,float16,fp8,0,1.613653341929118
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,8,1,128,1,float16,fp8,0,0.8314932982126871
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,8,8,128,1,float16,fp8,0,0.8721066315968832
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,8,8,128,1,float16,float16,0,0.8618666330973307
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,8,8,128,1,fp8,fp8,0,0.7799200216929117
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,8,1,128,1,fp8,fp8,0,0.7475252946217855
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,8,4,128,1,fp8,fp8,0,1.4066346486409504
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,8,2,128,1,float16,float16,0,0.835909366607666
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,8,2,128,1,float16,fp8,0,0.8471893469492594
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,8,2,128,1,fp8,fp8,0,0.7519573370615641
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,8,4,128,1,float16,float16,0,0.8478720188140869
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,8,8,128,1,float16,float16,0,0.4852000077565511
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,8,1,128,1,float16,float16,0,0.4660960038503011
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,8,4,128,1,float16,fp8,0,0.8485386371612549
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,8,4,128,1,fp8,fp8,0,0.7608266671498617
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,8,8,128,1,float16,fp8,0,0.48930132389068604
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,8,1,128,1,float16,fp8,0,0.4657280047734578
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,8,8,128,1,fp8,fp8,0,0.44697598616282147
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,8,1,128,1,fp8,fp8,0,0.4299039840698242
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,8,2,128,1,float16,float16,0,0.46779731909434
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,8,2,128,1,fp8,fp8,0,0.4336640040079753
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,8,2,128,1,float16,fp8,0,0.47325865427652997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,8,8,128,1,float16,float16,0,0.3022453387578328
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,8,4,128,1,float16,float16,0,0.4780373175938924
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,8,4,128,1,float16,fp8,0,0.4787199894587199
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,8,4,128,1,fp8,fp8,0,0.43878400325775146
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,8,1,128,1,float16,float16,0,0.29576534032821655
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,8,1,128,1,fp8,fp8,0,0.274944007396698
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,8,1,128,1,float16,fp8,0,0.2964479923248291
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,8,8,128,1,float16,fp8,0,0.3059999942779541
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,8,8,128,1,fp8,fp8,0,0.28484266996383667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,8,2,128,1,float16,float16,0,0.2943999965985616
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,8,2,128,1,fp8,fp8,0,0.27391467491785687
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,8,2,128,1,float16,fp8,0,0.2984960079193115
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,8,4,128,1,float16,float16,0,0.2984960079193115
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,8,4,128,1,float16,fp8,0,0.2991786599159241
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,8,4,128,1,fp8,fp8,0,0.2762986620267232
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,8,1,128,1,float16,fp8,0,2.0742719968159995
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,8,1,128,1,float16,float16,0,2.0742719968159995
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,8,1,128,1,fp8,fp8,0,1.8300639788309734
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,8,2,128,1,float16,fp8,0,2.08622407913208
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,8,2,128,1,float16,float16,0,2.0834827423095703
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,8,2,128,1,fp8,fp8,0,1.8493439356486003
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,8,4,128,1,float16,float16,0,2.098858674367269
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,8,4,128,1,float16,fp8,0,2.14084259668986
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,8,1,128,1,float16,float16,0,1.0705866813659668
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,8,1,128,1,float16,fp8,0,1.0695733229319255
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,8,8,128,1,float16,float16,0,1.122479995091756
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,8,8,128,1,float16,fp8,0,1.1385119756062825
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,8,1,128,1,fp8,fp8,0,0.9620693524678549
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,8,8,128,1,fp8,fp8,0,1.0091573397318523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,8,2,128,1,float16,float16,0,1.0927786827087402
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,8,2,128,1,float16,fp8,0,1.0801546573638916
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,8,4,128,1,fp8,fp8,0,1.8674346605936687
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,8,2,128,1,fp8,fp8,0,0.9681866963704427
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,8,1,128,1,float16,float16,0,0.5748053391774496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,8,1,128,1,float16,fp8,0,0.5758293469746908
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,8,8,128,1,float16,float16,0,0.6038240194320679
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,8,4,128,1,float16,float16,0,1.093125343322754
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,8,4,128,1,float16,fp8,0,1.105738639831543
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,8,8,128,1,fp8,fp8,0,0.5529599984486898
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,8,4,128,1,fp8,fp8,0,0.9804746309916178
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,8,8,128,1,float16,fp8,0,0.6150879859924316
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,8,1,128,1,fp8,fp8,0,0.5249760150909424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,8,2,128,1,float16,float16,0,0.5802719990412394
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,8,2,128,1,float16,fp8,0,0.5840160051981608
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,8,2,128,1,fp8,fp8,0,0.5314186811447144
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,8,4,128,1,float16,float16,0,0.5908639828364054
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,8,4,128,1,float16,fp8,0,0.5915360053380331
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,8,8,128,1,float16,fp8,0,0.35140268007914227
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,8,8,128,1,float16,float16,0,0.3500373363494873
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,8,4,128,1,fp8,fp8,0,0.5365813175837199
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,8,1,128,1,float16,fp8,0,0.3309333324432373
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,8,1,128,1,float16,float16,0,0.3322880069414775
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,8,8,128,1,fp8,fp8,0,0.32477333148320514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,8,1,128,1,fp8,fp8,0,0.306005338827769
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,8,2,128,1,float16,float16,0,0.3350133498509725
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,8,2,128,1,float16,fp8,0,0.33399466673533124
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,8,2,128,1,fp8,fp8,0,0.3114666740099589
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,8,4,128,1,float16,float16,0,0.34012798468271893
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,8,4,128,1,float16,fp8,0,0.34115731716156006
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,8,4,128,1,fp8,fp8,0,0.3176106611887614
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,8,8,128,1,float16,float16,0,0.2218666672706604
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,8,8,128,1,float16,fp8,0,0.22528000672658285
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,8,1,128,1,float16,float16,0,0.21708800395329794
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,8,8,128,1,fp8,fp8,0,0.20923733711242676
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,8,1,128,1,float16,fp8,0,0.21880000829696655
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,8,2,128,1,float16,float16,0,0.21778132518132529
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,8,2,128,1,float16,fp8,0,0.21913599967956543
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,8,4,128,1,float16,fp8,0,0.22152533133824667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,8,2,128,1,fp8,fp8,0,0.20291733741760254
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,8,1,128,1,fp8,fp8,0,0.203274667263031
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,8,4,128,1,float16,float16,0,0.21982399622599283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,8,4,128,1,fp8,fp8,0,0.20377600193023682
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,8,1,128,1,float16,float16,0,1.2972373167673747
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,8,1,128,1,float16,fp8,0,1.2825600306193035
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,8,1,128,1,fp8,fp8,0,1.1521706581115723
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,8,2,128,1,float16,fp8,0,1.2900746663411458
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,8,2,128,1,float16,float16,0,1.2934772968292236
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,8,2,128,1,fp8,fp8,0,1.1689279874165852
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,8,4,128,1,float16,float16,0,1.31822935740153
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,8,4,128,1,float16,fp8,0,1.3120853106180828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,8,1,128,1,float16,float16,0,0.6713973681131998
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,8,1,128,1,float16,fp8,0,0.6686720053354899
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,8,8,128,1,float16,float16,0,0.7096426486968994
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,8,8,128,1,float16,fp8,0,0.7144052982330322
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,8,1,128,1,fp8,fp8,0,0.6143999894460043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,8,2,128,1,float16,float16,0,0.6775466601053873
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,8,8,128,1,fp8,fp8,0,0.652458667755127
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,8,4,128,1,fp8,fp8,0,1.1898880004882812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,8,2,128,1,float16,fp8,0,0.6799413363138834
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,8,2,128,1,fp8,fp8,0,0.618837316830953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,8,1,128,1,float16,float16,0,0.3667626778284709
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,8,4,128,1,float16,float16,0,0.6877866586049398
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,8,4,128,1,float16,fp8,0,0.688810666402181
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,8,8,128,1,float16,float16,0,0.38980265458424884
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,8,8,128,1,float16,fp8,0,0.39424534638722736
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,8,4,128,1,fp8,fp8,0,0.6285653511683146
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,8,8,128,1,fp8,fp8,0,0.3643786509831746
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,8,1,128,1,float16,fp8,0,0.36744534969329834
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,8,1,128,1,fp8,fp8,0,0.3428693215052287
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,8,2,128,1,float16,float16,0,0.3729066848754883
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,8,2,128,1,float16,fp8,0,0.37461332480112713
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,8,2,128,1,fp8,fp8,0,0.3466293414433797
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,8,4,128,1,float16,float16,0,0.37836798032124835
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,8,4,128,1,float16,fp8,0,0.38281067212422687
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,8,8,128,1,float16,float16,0,0.23244800170262656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,8,4,128,1,fp8,fp8,0,0.3534506559371948
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,8,1,128,1,float16,float16,0,0.2218666672706604
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,8,8,128,1,float16,fp8,0,0.23381332556406656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,8,1,128,1,float16,fp8,0,0.22186134258906046
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,8,8,128,1,fp8,fp8,0,0.2177706758181254
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,8,1,128,1,fp8,fp8,0,0.20325332880020142
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,8,2,128,1,float16,float16,0,0.2218666672706604
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,8,2,128,1,float16,fp8,0,0.22291199366251627
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,8,2,128,1,fp8,fp8,0,0.20667733748753866
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,8,1,128,1,float16,float16,0,0.14967466394106546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,8,4,128,1,float16,float16,0,0.2249386707941691
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,8,8,128,1,float16,float16,0,0.14984533190727234
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,8,4,128,1,float16,fp8,0,0.22596800327301025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,8,4,128,1,fp8,fp8,0,0.21230934063593546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,8,8,128,1,float16,fp8,0,0.1532586713631948
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,8,8,128,1,fp8,fp8,0,0.14148267110188803
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,8,1,128,1,fp8,fp8,0,0.13979199528694153
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,8,1,128,1,float16,fp8,0,0.1493280033270518
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,8,2,128,1,float16,fp8,0,0.14913066228230795
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,8,2,128,1,float16,float16,0,0.15035733580589294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,8,4,128,1,fp8,fp8,0,0.14148799578348795
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,8,2,128,1,fp8,fp8,0,0.13909332950909933
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,8,4,128,1,float16,fp8,0,0.15189866224924722
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,8,4,128,1,float16,float16,0,0.1508693297704061
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,8,1,128,1,fp8,fp8,0,1.2298239866892497
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,8,1,128,1,float16,float16,0,1.318933327992757
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,8,1,128,1,float16,fp8,0,1.3421227137247722
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,8,2,128,1,float16,float16,0,1.3417812983194988
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,8,2,128,1,float16,fp8,0,1.3513439496358235
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,8,2,128,1,fp8,fp8,0,1.242794672648112
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,8,4,128,1,float16,fp8,0,1.3619093894958496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,8,4,128,1,float16,float16,0,1.3550987243652344
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,8,1,128,1,float16,float16,0,0.6860799789428711
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,8,1,128,1,float16,fp8,0,0.6830080350240072
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,8,1,128,1,fp8,fp8,0,0.6364213228225708
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,8,2,128,1,float16,float16,0,0.6864266395568848
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,8,8,128,1,float16,fp8,0,0.7417173385620117
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,8,8,128,1,float16,float16,0,0.7273813088734945
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,8,8,128,1,fp8,fp8,0,0.6850559711456299
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,8,2,128,1,fp8,fp8,0,0.6432426770528158
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,8,2,128,1,float16,fp8,0,0.6949493090311686
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,8,4,128,1,fp8,fp8,0,1.2661866346995037
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,8,1,128,1,float16,float16,0,0.36607468128204346
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,8,1,128,1,float16,fp8,0,0.36881065368652344
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,8,8,128,1,float16,float16,0,0.3894666830698649
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,8,4,128,1,float16,float16,0,0.6976853211720785
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,8,8,128,1,float16,fp8,0,0.39628799756368
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,8,8,128,1,fp8,fp8,0,0.37185601393381756
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,8,4,128,1,float16,fp8,0,0.7021066347757975
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,8,4,128,1,fp8,fp8,0,0.659114678700765
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,8,1,128,1,fp8,fp8,0,0.3452586730321248
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,8,2,128,1,float16,float16,0,0.36983466148376465
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,8,2,128,1,float16,fp8,0,0.3708639939626058
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,8,2,128,1,fp8,fp8,0,0.34867199261983234
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,8,8,128,1,float16,float16,0,0.22664533058802286
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,8,4,128,1,float16,float16,0,0.37666134039560956
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,8,8,128,1,float16,fp8,0,0.227674663066864
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,8,4,128,1,float16,fp8,0,0.37939198811848956
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,8,4,128,1,fp8,fp8,0,0.35549867153167725
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,8,8,128,1,fp8,fp8,0,0.21435733636220297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,8,1,128,1,float16,fp8,0,0.20957867304484049
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,8,1,128,1,float16,float16,0,0.20784000555674234
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,8,2,128,1,float16,float16,0,0.21230934063593546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,8,2,128,1,fp8,fp8,0,0.20087466637293497
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,8,2,128,1,float16,fp8,0,0.2129813234011332
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,8,1,128,1,fp8,fp8,0,0.19643733898798624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,8,4,128,1,float16,float16,0,0.21811199188232422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,8,4,128,1,float16,fp8,0,0.21777600049972534
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,8,1,128,1,float16,float16,0,0.13499200344085693
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,8,4,128,1,fp8,fp8,0,0.2065066695213318
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,8,1,128,1,fp8,fp8,0,0.1256106694539388
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,8,8,128,1,float16,fp8,0,0.1402666668097178
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,8,8,128,1,float16,float16,0,0.13960533340771994
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,8,8,128,1,fp8,fp8,0,0.13448533415794373
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,8,1,128,1,float16,fp8,0,0.13516799608866373
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,8,2,128,1,float16,float16,0,0.1346560021241506
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,8,2,128,1,float16,fp8,0,0.1360160013039907
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,8,4,128,1,float16,float16,0,0.13567999998728433
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,8,2,128,1,fp8,fp8,0,0.12526933352152506
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,8,4,128,1,float16,fp8,0,0.1358506679534912
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,8,8,128,1,float16,float16,0,0.08839999636014302
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,8,8,128,1,float16,fp8,0,0.0904373327891032
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,8,4,128,1,fp8,fp8,0,0.12595199545224509
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,8,8,128,1,fp8,fp8,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,8,1,128,1,float16,fp8,0,0.08806399504343669
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,8,1,128,1,float16,float16,0,0.08737599849700928
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,8,2,128,1,float16,float16,0,0.08772266904513042
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,8,2,128,1,float16,fp8,0,0.08840533097585042
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,8,1,128,1,fp8,fp8,0,0.08397333820660909
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,8,2,128,1,fp8,fp8,0,0.08431466420491536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,8,4,128,1,fp8,fp8,0,0.08498666683832805
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,8,4,128,1,float16,fp8,0,0.0897759993871053
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,8,4,128,1,float16,float16,0,0.08703999718030293
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,8,1,128,1,fp8,fp8,0,0.8282506465911865
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,8,1,128,1,float16,float16,0,0.8717652956644694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,8,1,128,1,float16,fp8,0,0.8676640192667643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,8,2,128,1,float16,float16,0,0.8802986939748129
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,8,2,128,1,float16,fp8,0,0.8915627002716064
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,8,2,128,1,fp8,fp8,0,0.8379680315653483
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,8,4,128,1,float16,float16,0,0.8973600069681803
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,8,4,128,1,float16,fp8,0,0.8994133472442627
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,8,1,128,1,float16,fp8,0,0.45448533693949383
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,8,1,128,1,float16,float16,0,0.4599466721216838
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,8,8,128,1,float16,float16,0,0.4896426598230998
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,8,1,128,1,fp8,fp8,0,0.4333333174387614
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,8,8,128,1,fp8,fp8,0,0.47018667062123615
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,8,8,128,1,float16,fp8,0,0.49272000789642334
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,8,4,128,1,fp8,fp8,0,0.8574293454488119
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,8,2,128,1,float16,fp8,0,0.460970679918925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,8,2,128,1,float16,float16,0,0.4589279890060425
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,8,2,128,1,fp8,fp8,0,0.44492801030476886
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,8,1,128,1,float16,float16,0,0.2491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,8,1,128,1,float16,fp8,0,0.24916799863179526
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,8,8,128,1,float16,float16,0,0.2677759925524394
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,8,4,128,1,float16,float16,0,0.4705280065536499
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,8,4,128,1,float16,fp8,0,0.4701546827952067
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,8,4,128,1,fp8,fp8,0,0.4483413298924764
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,8,8,128,1,fp8,fp8,0,0.25702399015426636
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,8,8,128,1,float16,fp8,0,0.2725600004196167
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,8,1,128,1,fp8,fp8,0,0.23894933859507242
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,8,2,128,1,float16,float16,0,0.2542666594187419
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,8,2,128,1,float16,fp8,0,0.25466134150822956
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,8,2,128,1,fp8,fp8,0,0.24199465910593668
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,8,8,128,1,float16,float16,0,0.15598400433858237
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,8,4,128,1,float16,float16,0,0.25602134068806964
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,8,4,128,1,float16,fp8,0,0.2606026728947957
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,8,4,128,1,fp8,fp8,0,0.24576000372568765
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,8,8,128,1,float16,fp8,0,0.1570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,8,1,128,1,float16,float16,0,0.1455839971701304
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,8,1,128,1,float16,fp8,0,0.14437333742777506
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,8,1,128,1,fp8,fp8,0,0.1341493328412374
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,8,8,128,1,fp8,fp8,0,0.15191466609636942
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,8,2,128,1,float16,float16,0,0.14389866590499878
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,8,2,128,1,float16,fp8,0,0.14677332838376364
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,8,2,128,1,fp8,fp8,0,0.1367039978504181
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,8,4,128,1,float16,float16,0,0.14643733700116476
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,8,4,128,1,float16,fp8,0,0.14916266997655234
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,8,1,128,1,float16,float16,0,0.09489066402117412
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,8,1,128,1,fp8,fp8,0,0.08840533097585042
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,8,1,128,1,float16,fp8,0,0.09489066402117412
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,8,4,128,1,fp8,fp8,0,0.14455466469128928
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,8,8,128,1,float16,float16,0,0.096261332432429
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,8,8,128,1,fp8,fp8,0,0.09284800291061401
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,8,8,128,1,float16,fp8,0,0.09761599699656169
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,8,2,128,1,float16,float16,0,0.09452799956003825
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,8,2,128,1,float16,fp8,0,0.09489599863688152
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,8,2,128,1,fp8,fp8,0,0.08874666690826416
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,8,8,128,1,float16,float16,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,8,4,128,1,float16,float16,0,0.0962506632010142
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,8,4,128,1,float16,fp8,0,0.09522666533788045
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,8,1,128,1,float16,float16,0,0.07034666836261749
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,8,8,128,1,fp8,fp8,0,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,8,1,128,1,float16,fp8,0,0.07167466481526692
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,8,8,128,1,float16,fp8,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,8,4,128,1,fp8,fp8,0,0.09147733449935913
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,8,1,128,1,fp8,fp8,0,0.06656000018119812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,8,2,128,1,float16,float16,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,8,2,128,1,fp8,fp8,0,0.06619200110435486
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,8,2,128,1,float16,fp8,0,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,8,4,128,1,float16,fp8,0,0.07202133536338806
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,8,4,128,1,float16,float16,0,0.07100266714890797
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,8,4,128,1,fp8,fp8,0,0.06655466556549072
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,8,1,128,1,float16,fp8,0,1.0129066308339436
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,8,1,128,1,float16,float16,0,1.0077760219573975
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,8,1,128,1,fp8,fp8,0,0.9760586420694987
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,8,2,128,1,fp8,fp8,0,0.9965226650238037
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,8,2,128,1,float16,float16,0,1.0173386732737224
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,8,2,128,1,float16,fp8,0,1.0227999687194824
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,8,4,128,1,float16,float16,0,1.032362699508667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,8,4,128,1,float16,fp8,0,1.0497759977976482
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,8,1,128,1,float16,float16,0,0.5118559996287028
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,8,1,128,1,float16,fp8,0,0.5162719885508219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,8,8,128,1,float16,float16,0,0.5557119846343994
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,8,1,128,1,fp8,fp8,0,0.49987733364105225
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,8,8,128,1,float16,fp8,0,0.5587679942448934
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,8,2,128,1,float16,float16,0,0.5246239900588989
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,8,8,128,1,fp8,fp8,0,0.5508799950281779
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,8,4,128,1,fp8,fp8,0,1.021440029144287
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,8,2,128,1,float16,fp8,0,0.5190026760101318
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,8,2,128,1,fp8,fp8,0,0.5118240118026733
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,8,1,128,1,float16,float16,0,0.26948267221450806
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,8,1,128,1,float16,fp8,0,0.26981866359710693
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,8,4,128,1,float16,float16,0,0.53111465771993
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,8,4,128,1,float16,fp8,0,0.5325066645940145
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,8,8,128,1,fp8,fp8,0,0.2916959921518962
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,8,8,128,1,float16,fp8,0,0.2961066762606303
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,8,8,128,1,float16,float16,0,0.29543999830881756
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,8,4,128,1,fp8,fp8,0,0.5239253441492716
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,8,1,128,1,fp8,fp8,0,0.26638933022816974
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,8,2,128,1,float16,float16,0,0.27425599098205566
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,8,2,128,1,fp8,fp8,0,0.27084799607594806
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,8,2,128,1,float16,fp8,0,0.2749600013097127
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,8,4,128,1,float16,float16,0,0.2797279953956604
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,8,8,128,1,float16,float16,0,0.16247466206550598
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,8,4,128,1,float16,fp8,0,0.2821066578229268
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,8,8,128,1,float16,fp8,0,0.16554666558901468
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,8,1,128,1,float16,float16,0,0.14643200238545737
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,8,1,128,1,float16,fp8,0,0.1462613344192505
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,8,4,128,1,fp8,fp8,0,0.279039998849233
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,8,8,128,1,fp8,fp8,0,0.1634986698627472
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,8,1,128,1,fp8,fp8,0,0.14230400323867798
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,8,2,128,1,float16,float16,0,0.14899200201034546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,8,2,128,1,float16,fp8,0,0.1493333379427592
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,8,2,128,1,fp8,fp8,0,0.14916266997655234
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,8,4,128,1,float16,float16,0,0.1565013329188029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,8,8,128,1,float16,float16,0,0.0962666670481364
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,8,4,128,1,float16,fp8,0,0.1565013329188029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,8,4,128,1,fp8,fp8,0,0.15597333510716757
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,8,1,128,1,float16,float16,0,0.09078933795293172
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,8,8,128,1,float16,fp8,0,0.0962559978167216
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,8,1,128,1,fp8,fp8,0,0.08741333087285359
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,8,2,128,1,float16,float16,0,0.09181867043177287
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,8,1,128,1,float16,fp8,0,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,8,8,128,1,fp8,fp8,0,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,8,2,128,1,float16,fp8,0,0.09216533104578654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,8,2,128,1,fp8,fp8,0,0.08806932965914409
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,8,4,128,1,float16,float16,0,0.09250133236249287
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,8,8,128,1,float16,float16,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,8,4,128,1,fp8,fp8,0,0.08874666690826416
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,8,4,128,1,float16,fp8,0,0.09318400422732036
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,8,1,128,1,float16,float16,0,0.058687999844551086
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,8,8,128,1,float16,fp8,0,0.061103999614715576
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,8,8,128,1,fp8,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,8,1,128,1,fp8,fp8,0,0.054272000988324486
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,8,1,128,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,8,2,128,1,float16,float16,0,0.05769066512584686
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,8,2,128,1,float16,fp8,0,0.05836800237496694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,8,2,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,8,4,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,8,4,128,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,8,4,128,1,float16,float16,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,8,8,128,1,float16,float16,0,0.05188799897829691
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,8,8,128,1,float16,fp8,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,8,1,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,8,8,128,1,fp8,fp8,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,8,1,128,1,float16,float16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,8,2,128,1,float16,float16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,8,2,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,8,1,128,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,8,2,128,1,float16,fp8,0,0.05222400029500326
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,8,4,128,1,float16,fp8,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,8,4,128,1,float16,float16,0,0.05186133086681366
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,8,4,128,1,fp8,fp8,0,0.04950400193532308
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,8,1,128,1,float16,float16,0,0.7167840003967285
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,8,1,128,1,fp8,fp8,0,0.7120213508605957
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,8,1,128,1,float16,fp8,0,0.7185119787851969
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,8,2,128,1,float16,float16,0,0.7280639807383219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,8,2,128,1,float16,fp8,0,0.7311360041300455
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,8,2,128,1,fp8,fp8,0,0.724992036819458
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,8,4,128,1,float16,fp8,0,0.7413973013559977
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,8,4,128,1,float16,float16,0,0.7434240182240804
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,8,1,128,1,float16,float16,0,0.3667733271916707
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,8,1,128,1,float16,fp8,0,0.3671040137608846
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,8,8,128,1,float16,fp8,0,0.4010719855626424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,8,8,128,1,float16,float16,0,0.4020906686782837
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,8,1,128,1,fp8,fp8,0,0.368127981821696
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,8,8,128,1,fp8,fp8,0,0.4072106679280599
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,8,2,128,1,float16,float16,0,0.37324798107147217
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,8,4,128,1,fp8,fp8,0,0.7369386355082194
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,8,2,128,1,float16,fp8,0,0.37461332480112713
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,8,2,128,1,fp8,fp8,0,0.3712000052134196
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,8,1,128,1,float16,float16,0,0.19490132729212442
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,8,8,128,1,float16,float16,0,0.21606934070587158
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,8,4,128,1,float16,fp8,0,0.38078399499257404
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,8,4,128,1,float16,float16,0,0.37836798032124835
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,8,1,128,1,float16,fp8,0,0.19472533464431763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,8,8,128,1,fp8,fp8,0,0.21913599967956543
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,8,4,128,1,fp8,fp8,0,0.3824640115102132
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,8,8,128,1,float16,fp8,0,0.2194719910621643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,8,1,128,1,fp8,fp8,0,0.19933867454528809
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,8,2,128,1,fp8,fp8,0,0.20241065820058188
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,8,2,128,1,float16,fp8,0,0.20018132527669272
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,8,2,128,1,float16,float16,0,0.20036800702412924
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,8,4,128,1,float16,float16,0,0.2041226625442505
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,8,8,128,1,float16,float16,0,0.12390933434168498
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,8,4,128,1,float16,fp8,0,0.2071839968363444
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,8,8,128,1,float16,fp8,0,0.12458667159080505
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,8,4,128,1,fp8,fp8,0,0.20736000935236612
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,8,1,128,1,float16,float16,0,0.10854933659235637
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,8,1,128,1,float16,fp8,0,0.10922132929166158
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,8,1,128,1,fp8,fp8,0,0.10752532879511516
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,8,2,128,1,float16,fp8,0,0.11195733149846394
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,8,8,128,1,fp8,fp8,0,0.1256160040696462
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,8,2,128,1,fp8,fp8,0,0.10955733060836792
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,8,2,128,1,float16,float16,0,0.11229866743087769
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,8,4,128,1,float16,float16,0,0.11401066184043884
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,8,1,128,1,float16,float16,0,0.07065066695213318
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,8,4,128,1,float16,fp8,0,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,8,1,128,1,fp8,fp8,0,0.06929066777229309
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,8,1,128,1,float16,fp8,0,0.07204799850781758
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,8,4,128,1,fp8,fp8,0,0.11672533551851909
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,8,8,128,1,float16,float16,0,0.07372800012429555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,8,8,128,1,float16,fp8,0,0.07337599992752075
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,8,8,128,1,fp8,fp8,0,0.07341333230336507
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,8,2,128,1,float16,float16,0,0.07097599903742473
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,8,2,128,1,fp8,fp8,0,0.06963733335336049
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,8,2,128,1,float16,fp8,0,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,8,4,128,1,float16,float16,0,0.07167466481526692
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,8,4,128,1,float16,fp8,0,0.07201600074768066
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,8,8,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,8,8,128,1,float16,fp8,0,0.05120000243186951
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,8,4,128,1,fp8,fp8,0,0.07065600156784058
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,8,1,128,1,float16,float16,0,0.0481333335240682
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,8,8,128,1,fp8,fp8,0,0.04916266600290934
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,8,1,128,1,fp8,fp8,0,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,8,1,128,1,float16,fp8,0,0.049498667319615684
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,8,2,128,1,float16,float16,0,0.04779199759165446
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,8,2,128,1,fp8,fp8,0,0.047456001242001854
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,8,2,128,1,float16,fp8,0,0.05085866649945577
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,8,4,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,8,4,128,1,float16,float16,0,0.05017599960168203
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,8,4,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,8,8,128,1,float16,float16,0,0.04337066908677419
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,8,8,128,1,fp8,fp8,0,0.041984001795450844
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,8,1,128,1,fp8,fp8,0,0.04061333338419596
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,8,8,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,8,2,128,1,float16,float16,0,0.04437333345413208
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,8,1,128,1,float16,float16,0,0.04334400097529093
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,8,1,128,1,float16,fp8,0,0.04437333345413208
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,8,2,128,1,float16,fp8,0,0.044031997521718345
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,8,4,128,1,float16,float16,0,0.043706665436426796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,8,4,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,8,2,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,8,4,128,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,8,1,128,1,float16,float16,0,0.7741386890411377
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,8,1,128,1,fp8,fp8,0,0.801637331644694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,8,1,128,1,float16,fp8,0,0.7714133262634277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,8,2,128,1,float16,float16,0,0.7854080200195312
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,8,2,128,1,float16,fp8,0,0.7796160380045573
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,8,2,128,1,fp8,fp8,0,0.8197066783905029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,8,4,128,1,float16,fp8,0,0.7951359748840332
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,8,4,128,1,float16,float16,0,0.8122026920318604
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,8,1,128,1,float16,float16,0,0.39762667814890545
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,8,1,128,1,float16,fp8,0,0.39558935165405273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,8,8,128,1,float16,float16,0,0.4336640040079753
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,8,1,128,1,fp8,fp8,0,0.4116479953130086
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,8,8,128,1,float16,fp8,0,0.4254719813664754
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,8,8,128,1,fp8,fp8,0,0.45278934637705487
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,8,2,128,1,float16,fp8,0,0.3983360131581624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,8,4,128,1,fp8,fp8,0,0.912384033203125
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,8,2,128,1,float16,float16,0,0.404149333635966
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,8,2,128,1,fp8,fp8,0,0.42069868246714276
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,8,1,128,1,float16,fp8,0,0.21060800552368164
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,8,8,128,1,float16,float16,0,0.22971733411153158
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,8,1,128,1,float16,float16,0,0.21160000562667847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,8,4,128,1,float16,float16,0,0.41437868277231854
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,8,4,128,1,float16,fp8,0,0.4089173475901286
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,8,8,128,1,float16,fp8,0,0.22561599810918173
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,8,4,128,1,fp8,fp8,0,0.4538026650746663
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,8,8,128,1,fp8,fp8,0,0.23756267627080283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,8,1,128,1,fp8,fp8,0,0.2160586714744568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,8,2,128,1,float16,float16,0,0.21397332350413004
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,8,2,128,1,float16,fp8,0,0.2136746644973755
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,8,2,128,1,fp8,fp8,0,0.22118399540583292
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,8,4,128,1,float16,float16,0,0.22016000747680664
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,8,4,128,1,float16,fp8,0,0.21776533126831055
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,8,8,128,1,float16,float16,0,0.1283466617266337
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,8,4,128,1,fp8,fp8,0,0.233130673567454
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,8,1,128,1,float16,float16,0,0.11470400293668111
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,8,8,128,1,float16,fp8,0,0.1269760032494863
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,8,1,128,1,float16,fp8,0,0.11467732985814412
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,8,1,128,1,fp8,fp8,0,0.11674132943153381
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,8,8,128,1,fp8,fp8,0,0.13004799683888754
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,8,2,128,1,float16,float16,0,0.1160586675008138
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,8,2,128,1,float16,fp8,0,0.11502933502197266
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,8,2,128,1,fp8,fp8,0,0.11844799915949504
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,8,8,128,1,float16,float16,0,0.07339199880758922
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,8,8,128,1,float16,fp8,0,0.07168533404668172
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,8,8,128,1,fp8,fp8,0,0.07441066702206929
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,8,1,128,1,float16,float16,0,0.06858133276303609
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,8,4,128,1,float16,fp8,0,0.1181013286113739
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,8,4,128,1,float16,float16,0,0.12219732999801636
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,8,4,128,1,fp8,fp8,0,0.12595199545224509
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,8,1,128,1,float16,fp8,0,0.06791999936103821
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,8,1,128,1,fp8,fp8,0,0.06689600149790446
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,8,2,128,1,float16,fp8,0,0.06826133529345195
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,8,2,128,1,float16,float16,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,8,8,128,1,float16,float16,0,0.04331733286380768
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,8,2,128,1,fp8,fp8,0,0.06791999936103821
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,8,4,128,1,fp8,fp8,0,0.07202666501204173
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,8,4,128,1,float16,float16,0,0.06963733335336049
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,8,4,128,1,float16,fp8,0,0.07030933101971944
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,8,1,128,1,float16,float16,0,0.040618665516376495
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,8,8,128,1,float16,fp8,0,0.04164800047874451
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,8,8,128,1,fp8,fp8,0,0.04167466859022776
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,8,1,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,8,1,128,1,fp8,fp8,0,0.03958400090535482
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,8,2,128,1,float16,float16,0,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,8,2,128,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,8,4,128,1,float16,float16,0,0.04165866722663244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,8,2,128,1,fp8,fp8,0,0.041637333730856575
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,8,4,128,1,float16,fp8,0,0.043007999658584595
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,8,4,128,1,fp8,fp8,0,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,8,8,128,1,float16,float16,0,0.035504000882307686
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,8,1,128,1,float16,fp8,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,8,1,128,1,float16,float16,0,0.03311999887228012
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,8,8,128,1,float16,fp8,0,0.03583999971548716
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,8,8,128,1,fp8,fp8,0,0.03344533344109853
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,8,1,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,8,2,128,1,float16,float16,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,8,2,128,1,float16,fp8,0,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,8,2,128,1,fp8,fp8,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,8,4,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,8,4,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,8,8,128,1,fp8,fp8,0,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,8,8,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,8,4,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,8,8,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,8,1,128,1,fp8,fp8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,8,1,128,1,float16,fp8,0,0.031034665803114574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,8,2,128,1,float16,fp8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,8,2,128,1,float16,float16,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,8,4,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,8,2,128,1,fp8,fp8,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,8,1,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,8,4,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,8,4,128,1,float16,float16,0,0.0310506671667099
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,8,1,128,1,float16,float16,0,0.679258664449056
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,8,1,128,1,float16,fp8,0,0.6737919648488363
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,8,1,128,1,fp8,fp8,0,0.7205546696980795
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,8,2,128,1,float16,float16,0,0.6850506464640299
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,8,2,128,1,float16,fp8,0,0.6830133597056071
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,8,2,128,1,fp8,fp8,0,0.7386506398518881
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,8,4,128,1,float16,float16,0,0.7068959871927897
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,8,4,128,1,float16,fp8,0,0.7028053601582845
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,8,1,128,1,float16,float16,0,0.3490133285522461
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,8,1,128,1,float16,fp8,0,0.3473066488901774
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,8,8,128,1,float16,float16,0,0.38417065143585205
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,8,1,128,1,fp8,fp8,0,0.37357866764068604
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,8,8,128,1,float16,fp8,0,0.37802668412526447
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,8,8,128,1,fp8,fp8,0,0.4102826515833537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,8,2,128,1,float16,float16,0,0.3524266481399536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,8,4,128,1,fp8,fp8,0,0.8285919825236002
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,8,2,128,1,fp8,fp8,0,0.37938666343688965
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,8,2,128,1,float16,fp8,0,0.3520853519439697
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,8,1,128,1,float16,float16,0,0.18637333313624063
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,8,1,128,1,float16,fp8,0,0.18517333269119263
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,8,4,128,1,float16,float16,0,0.3647093375523885
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,8,8,128,1,float16,float16,0,0.20428800582885742
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,8,8,128,1,float16,fp8,0,0.2005280057589213
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,8,4,128,1,float16,fp8,0,0.3619946638743083
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,8,4,128,1,fp8,fp8,0,0.4174506664276123
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,8,8,128,1,fp8,fp8,0,0.21811199188232422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,8,1,128,1,fp8,fp8,0,0.1960960030555725
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,8,2,128,1,float16,float16,0,0.1879040002822876
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,8,2,128,1,fp8,fp8,0,0.2005386749903361
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,8,2,128,1,float16,fp8,0,0.1879040002822876
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,8,4,128,1,float16,float16,0,0.1938826640446981
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,8,4,128,1,float16,fp8,0,0.19338132937749228
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,8,8,128,1,float16,float16,0,0.11434666315714519
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,8,8,128,1,float16,fp8,0,0.11128000418345134
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,8,4,128,1,fp8,fp8,0,0.2136639952659607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,8,1,128,1,float16,float16,0,0.10068800052007039
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,8,1,128,1,float16,fp8,0,0.09966933727264404
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,8,8,128,1,fp8,fp8,0,0.11980799833933513
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,8,1,128,1,fp8,fp8,0,0.10478933652242024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,8,2,128,1,float16,float16,0,0.10205866893132527
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,8,2,128,1,fp8,fp8,0,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,8,2,128,1,float16,fp8,0,0.1013813316822052
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,8,4,128,1,float16,float16,0,0.10785599549611409
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,8,4,128,1,float16,fp8,0,0.10547733306884766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,8,4,128,1,fp8,fp8,0,0.11434132854143779
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,8,8,128,1,float16,float16,0,0.06176533301671346
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,8,1,128,1,float16,float16,0,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,8,8,128,1,float16,fp8,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,8,1,128,1,float16,fp8,0,0.056320001681645714
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,8,1,128,1,fp8,fp8,0,0.05973866581916809
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,8,2,128,1,float16,float16,0,0.05734399954477946
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,8,2,128,1,float16,fp8,0,0.05905599892139435
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,8,8,128,1,fp8,fp8,0,0.067930668592453
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,8,4,128,1,float16,float16,0,0.05870933334032694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,8,2,128,1,fp8,fp8,0,0.06010133524735769
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,8,1,128,1,float16,float16,0,0.0365280012289683
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,8,8,128,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,8,8,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,8,4,128,1,float16,fp8,0,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,8,8,128,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,8,4,128,1,fp8,fp8,0,0.06348800162474315
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,8,1,128,1,float16,fp8,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,8,2,128,1,float16,float16,0,0.03685333331425985
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,8,2,128,1,float16,fp8,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,8,1,128,1,fp8,fp8,0,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,8,2,128,1,fp8,fp8,0,0.03719999889532725
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,8,4,128,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,8,4,128,1,float16,float16,0,0.03721600025892258
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,8,8,128,1,float16,float16,0,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,8,4,128,1,fp8,fp8,0,0.038917332887649536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,8,1,128,1,float16,fp8,0,0.029370665550231934
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,8,1,128,1,fp8,fp8,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,8,8,128,1,float16,fp8,0,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,8,2,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,8,8,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,8,1,128,1,float16,float16,0,0.027647999425729115
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,8,2,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,8,2,128,1,fp8,fp8,0,0.029338667790095013
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,8,4,128,1,float16,fp8,0,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,8,4,128,1,float16,float16,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,8,8,128,1,float16,float16,0,0.025274666647116344
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,8,1,128,1,float16,fp8,0,0.025253333151340485
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,8,1,128,1,float16,float16,0,0.025253333151340485
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,8,4,128,1,fp8,fp8,0,0.02903466671705246
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,8,8,128,1,float16,fp8,0,0.02492800106604894
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,8,8,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,8,1,128,1,fp8,fp8,0,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,8,2,128,1,float16,fp8,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,8,2,128,1,float16,float16,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,8,2,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,8,4,128,1,float16,float16,0,0.025600001215934753
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,8,4,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,8,4,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,8,8,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,8,8,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,8,1,128,1,float16,float16,0,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,8,8,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,8,1,128,1,float16,fp8,0,0.023205332458019257
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,8,1,128,1,fp8,fp8,0,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,8,4,128,1,float16,float16,0,0.023205332458019257
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,8,4,128,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,8,2,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,8,2,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,8,4,128,1,fp8,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,8,2,128,1,fp8,fp8,0,0.023205332458019257
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,8,1,128,1,float16,float16,0,0.26641066869099933
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,8,1,128,1,float16,fp8,0,0.26470933357874554
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,8,1,128,1,fp8,fp8,0,0.27904532353083294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,8,2,128,1,float16,float16,0,0.27698665857315063
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,8,2,128,1,float16,fp8,0,0.2739306688308716
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,8,2,128,1,fp8,fp8,0,0.28757333755493164
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,8,4,128,1,float16,float16,0,0.28722665707270306
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,8,4,128,1,float16,fp8,0,0.2879306674003601
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,8,1,128,1,float16,float16,0,0.1474560002485911
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,8,1,128,1,float16,fp8,0,0.14421332875887552
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,8,8,128,1,float16,fp8,0,0.16196266810099283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,8,8,128,1,float16,float16,0,0.16657066345214844
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,8,8,128,1,fp8,fp8,0,0.17221333583196005
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,8,4,128,1,fp8,fp8,0,0.3131573398907979
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,8,1,128,1,fp8,fp8,0,0.15019200245539346
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,8,2,128,1,float16,float16,0,0.15205867091814676
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,8,2,128,1,float16,fp8,0,0.15034666657447815
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,8,2,128,1,fp8,fp8,0,0.15292800466219583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,8,4,128,1,float16,float16,0,0.1551359991232554
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,8,8,128,1,float16,float16,0,0.09591466188430786
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,8,4,128,1,float16,fp8,0,0.15598932902018228
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,8,4,128,1,fp8,fp8,0,0.16503467162450156
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,8,8,128,1,float16,fp8,0,0.09385066231091817
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,8,1,128,1,float16,float16,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,8,1,128,1,float16,fp8,0,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,8,8,128,1,fp8,fp8,0,0.09727999567985535
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,8,1,128,1,fp8,fp8,0,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,8,2,128,1,float16,fp8,0,0.08089600006739299
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,8,2,128,1,float16,float16,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,8,2,128,1,fp8,fp8,0,0.0846506655216217
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,8,8,128,1,float16,float16,0,0.05120000243186951
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,8,4,128,1,float16,fp8,0,0.08703999718030293
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,8,4,128,1,fp8,fp8,0,0.09181333581606548
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,8,4,128,1,float16,float16,0,0.08669867118199666
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,8,1,128,1,float16,fp8,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,8,8,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,8,1,128,1,float16,float16,0,0.04742933313051859
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,8,1,128,1,fp8,fp8,0,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,8,8,128,1,fp8,fp8,0,0.053583999474843345
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,8,2,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,8,2,128,1,float16,fp8,0,0.048810665806134544
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,8,2,128,1,fp8,fp8,0,0.048810665806134544
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,8,4,128,1,float16,float16,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,8,8,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,8,4,128,1,float16,fp8,0,0.04775999983151754
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,8,1,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,8,4,128,1,fp8,fp8,0,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,8,8,128,1,float16,fp8,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,8,1,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,8,1,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,8,2,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,8,8,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,8,2,128,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,8,2,128,1,fp8,fp8,0,0.03107200066248576
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,8,1,128,1,float16,float16,0,0.024933333198229473
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,8,8,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,8,8,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,8,4,128,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,8,4,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,8,4,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,8,8,128,1,fp8,fp8,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,8,1,128,1,float16,fp8,0,0.02492800106604894
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,8,1,128,1,fp8,fp8,0,0.023221333821614582
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,8,2,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,8,2,128,1,float16,fp8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,8,4,128,1,float16,fp8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,8,4,128,1,float16,float16,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,8,2,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,8,4,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,8,8,128,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,8,8,128,1,fp8,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,8,8,128,1,float16,float16,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,8,1,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,8,2,128,1,float16,fp8,0,0.02083733429511388
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,8,1,128,1,float16,fp8,0,0.021840001145998638
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,8,2,128,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,8,1,128,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,8,4,128,1,float16,float16,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,8,2,128,1,float16,float16,0,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,8,4,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,8,4,128,1,float16,fp8,0,0.022522665560245514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,8,8,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,8,8,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,8,1,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,8,2,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,8,8,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,8,1,128,1,float16,float16,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,8,2,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,8,1,128,1,fp8,fp8,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,8,2,128,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,8,4,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,8,4,128,1,float16,fp8,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,8,4,128,1,fp8,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,8,8,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,8,8,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,8,8,128,1,float16,float16,0,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,8,1,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,8,1,128,1,fp8,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,8,2,128,1,float16,float16,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,8,1,128,1,float16,float16,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,8,2,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,8,2,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,8,4,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,8,4,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,8,4,128,1,float16,fp8,0,0.020848001043001812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,8,1,128,1,float16,float16,0,0.145578662554423
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,8,1,128,1,float16,fp8,0,0.14677332838376364
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,8,1,128,1,fp8,fp8,0,0.154448002576828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,8,2,128,1,float16,float16,0,0.1532586713631948
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,8,2,128,1,fp8,fp8,0,0.15803200006484985
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,8,2,128,1,float16,fp8,0,0.1493333379427592
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,8,4,128,1,float16,float16,0,0.1616266667842865
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,8,4,128,1,float16,fp8,0,0.15753600001335144
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,8,8,128,1,float16,float16,0,0.09557867050170898
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,8,1,128,1,float16,float16,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,8,8,128,1,float16,fp8,0,0.09046399593353271
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,8,1,128,1,float16,fp8,0,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,8,8,128,1,fp8,fp8,0,0.09830400347709656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,8,4,128,1,fp8,fp8,0,0.169813334941864
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,8,1,128,1,fp8,fp8,0,0.0846560001373291
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,8,2,128,1,float16,float16,0,0.08328533172607422
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,8,2,128,1,float16,fp8,0,0.08192533254623413
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,8,2,128,1,fp8,fp8,0,0.08840533097585042
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,8,8,128,1,float16,float16,0,0.05120000243186951
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,8,4,128,1,fp8,fp8,0,0.09386666615804036
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,8,4,128,1,float16,float16,0,0.08942932883898418
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,8,4,128,1,float16,fp8,0,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,8,8,128,1,float16,fp8,0,0.049829334020614624
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,8,8,128,1,fp8,fp8,0,0.057002668579419456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,8,1,128,1,float16,float16,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,8,1,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,8,1,128,1,fp8,fp8,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,8,2,128,1,float16,float16,0,0.049125333627065025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,8,2,128,1,float16,fp8,0,0.04880533119042715
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,8,2,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,8,4,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,8,8,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,8,8,128,1,float16,fp8,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,8,4,128,1,fp8,fp8,0,0.05359466870625814
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,8,8,128,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,8,4,128,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,8,1,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,8,1,128,1,float16,fp8,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,8,1,128,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,8,2,128,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,8,2,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,8,2,128,1,fp8,fp8,0,0.0330826664964358
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,8,4,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,8,1,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,8,4,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,8,8,128,1,fp8,fp8,0,0.02288000037272771
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,8,4,128,1,fp8,fp8,0,0.03516799956560135
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,8,8,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,8,8,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,8,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,8,1,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,8,2,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,8,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,8,4,128,1,fp8,fp8,0,0.023232000569502514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,8,8,128,1,float16,float16,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,8,8,128,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,8,4,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,8,4,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,8,2,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,8,8,128,1,fp8,fp8,0,0.018751999984184902
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,8,1,128,1,float16,float16,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,8,1,128,1,float16,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,8,1,128,1,fp8,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,8,2,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,8,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,8,4,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,8,2,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,8,4,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,8,4,128,1,float16,float16,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,8,8,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,8,8,128,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,8,8,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,8,1,128,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,8,1,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,8,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,8,1,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,8,2,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,8,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,8,4,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,8,4,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,8,2,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,8,8,128,1,float16,fp8,0,0.01876266673207283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,8,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,8,8,128,1,float16,float16,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,8,1,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,8,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,8,2,128,1,float16,float16,0,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,8,8,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,8,2,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,8,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,8,2,128,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,8,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,8,4,128,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,8,8,128,1,float16,float16,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,8,4,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,8,8,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,8,1,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,8,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,8,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,8,2,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,8,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,8,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,8,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,8,4,128,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,8,4,128,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,8,1,128,1,float16,float16,0,0.10308800141016643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,8,1,128,1,float16,fp8,0,0.10273599624633789
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,8,1,128,1,fp8,fp8,0,0.11028266946474712
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,8,2,128,1,float16,float16,0,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,8,2,128,1,fp8,fp8,0,0.1129866639773051
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,8,2,128,1,float16,fp8,0,0.10341866811116536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,8,4,128,1,float16,float16,0,0.10889066259066264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,8,4,128,1,float16,fp8,0,0.10820266604423523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,8,8,128,1,float16,fp8,0,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,8,8,128,1,float16,float16,0,0.0634933312733968
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,8,8,128,1,fp8,fp8,0,0.06894933183987935
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,8,4,128,1,fp8,fp8,0,0.1204906702041626
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,8,1,128,1,float16,float16,0,0.05870933334032694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,8,1,128,1,fp8,fp8,0,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,8,1,128,1,float16,fp8,0,0.05972800155480703
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,8,2,128,1,float16,float16,0,0.05973866581916809
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,8,2,128,1,float16,fp8,0,0.058101331194241844
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,8,2,128,1,fp8,fp8,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,8,4,128,1,float16,float16,0,0.06075199941794077
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,8,4,128,1,float16,fp8,0,0.06041066845258077
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,8,8,128,1,float16,float16,0,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,8,1,128,1,float16,float16,0,0.03755199909210205
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,8,8,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,8,4,128,1,fp8,fp8,0,0.06585066517194112
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,8,1,128,1,float16,fp8,0,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,8,8,128,1,float16,fp8,0,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,8,1,128,1,fp8,fp8,0,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,8,2,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,8,2,128,1,fp8,fp8,0,0.04061333338419596
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,8,2,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,8,4,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,8,8,128,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,8,4,128,1,float16,float16,0,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,8,8,128,1,fp8,fp8,0,0.027642667293548584
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,8,4,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,8,8,128,1,float16,float16,0,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,8,1,128,1,float16,float16,0,0.0266239990790685
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,8,1,128,1,fp8,fp8,0,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,8,2,128,1,float16,float16,0,0.025941332181294758
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,8,2,128,1,fp8,fp8,0,0.0262773334980011
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,8,1,128,1,float16,fp8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,8,2,128,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,8,4,128,1,float16,float16,0,0.02595199892918269
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,8,4,128,1,float16,fp8,0,0.02661866694688797
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,8,4,128,1,fp8,fp8,0,0.027301333844661713
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,8,8,128,1,float16,float16,0,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,8,8,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,8,8,128,1,fp8,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,8,1,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,8,1,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,8,2,128,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,8,2,128,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,8,4,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,8,1,128,1,fp8,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,8,4,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,8,2,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,8,4,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,8,8,128,1,float16,float16,0,0.01741333305835724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,8,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,8,8,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,8,1,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,8,1,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,8,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,8,2,128,1,float16,float16,0,0.016741332908471424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,8,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,8,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,8,4,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,8,4,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,8,2,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,8,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,8,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,8,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,8,1,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,8,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,8,2,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,8,2,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,8,8,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,8,2,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,8,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,8,4,128,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,8,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,8,8,128,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,8,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,8,4,128,1,float16,float16,0,0.016751999656359356
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,8,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,8,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,8,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,8,2,128,1,float16,float16,0,0.016741332908471424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,8,2,128,1,float16,fp8,0,0.01674666628241539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,8,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,8,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,8,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,8,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,8,8,128,1,fp8,fp8,0,0.016693333784739178
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,8,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,8,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,8,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,8,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,8,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,8,2,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,8,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,8,2,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,8,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,8,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,8,4,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,8,1,128,1,float16,fp8,0,0.08429333567619324
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,8,1,128,1,float16,float16,0,0.08495466907819112
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,8,1,128,1,fp8,fp8,0,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,8,2,128,1,float16,float16,0,0.08534399668375652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,8,2,128,1,float16,fp8,0,0.08258666594823201
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,8,4,128,1,float16,float16,0,0.08397333820660909
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,8,4,128,1,float16,fp8,0,0.08532266815503438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,8,2,128,1,fp8,fp8,0,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,8,8,128,1,float16,float16,0,0.05222400029500326
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,8,1,128,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,8,4,128,1,fp8,fp8,0,0.09386133154233296
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,8,8,128,1,float16,fp8,0,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,8,1,128,1,float16,float16,0,0.05156266689300537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,8,8,128,1,fp8,fp8,0,0.05597866574923197
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,8,2,128,1,fp8,fp8,0,0.05529599885145823
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,8,2,128,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,8,1,128,1,fp8,fp8,0,0.05359466870625814
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,8,8,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,8,4,128,1,float16,float16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,8,4,128,1,fp8,fp8,0,0.05597866574923197
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,8,4,128,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,8,2,128,1,float16,float16,0,0.05120000243186951
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,8,1,128,1,float16,fp8,0,0.0317493329445521
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,8,8,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,8,1,128,1,fp8,fp8,0,0.033786666889985405
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,8,2,128,1,float16,float16,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,8,1,128,1,float16,float16,0,0.03207999964555105
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,8,8,128,1,fp8,fp8,0,0.034474665919939675
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,8,4,128,1,float16,float16,0,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,8,8,128,1,float16,float16,0,0.023200000325838726
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,8,4,128,1,float16,fp8,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,8,4,128,1,fp8,fp8,0,0.034474665919939675
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,8,8,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,8,2,128,1,float16,fp8,0,0.03242666771014532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,8,1,128,1,float16,float16,0,0.02219199885924657
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,8,1,128,1,float16,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,8,2,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,8,1,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,8,2,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,8,2,128,1,float16,fp8,0,0.023178666830062866
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,8,8,128,1,fp8,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,8,4,128,1,float16,float16,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,8,4,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,8,8,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,8,8,128,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,8,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,8,1,128,1,float16,fp8,0,0.01844266677896182
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,8,1,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,8,8,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,8,2,128,1,float16,float16,0,0.01809599995613098
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,8,2,128,1,float16,fp8,0,0.01674666628241539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,8,1,128,1,float16,float16,0,0.017093333105246227
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,8,4,128,1,fp8,fp8,0,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,8,4,128,1,float16,float16,0,0.01809599995613098
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,8,4,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,8,2,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,8,4,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,8,8,128,1,float16,float16,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,8,8,128,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,8,1,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,8,1,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,8,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,8,8,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,8,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,8,4,128,1,float16,float16,0,0.016751999656359356
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,8,2,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,8,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,8,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,8,4,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,8,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,8,8,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,8,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,8,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,8,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,8,2,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,8,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,8,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,8,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,8,4,128,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,8,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,8,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,8,8,128,1,fp8,fp8,0,0.016704000532627106
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,8,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,8,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,8,4,128,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,8,1,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,8,2,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,8,1,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,8,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,8,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,8,4,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,8,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,8,8,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,8,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,8,1,128,1,float16,float16,0,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,8,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,8,4,128,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,8,8,128,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,8,1,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,8,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,8,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,8,4,128,1,float16,float16,0,0.014991999914248785
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,8,4,128,1,float16,fp8,0,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,8,2,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,8,4,128,1,fp8,fp8,0,0.016037333756685257
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,8,1,128,1,float16,fp8,0,0.07506666580835979
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,8,1,128,1,fp8,fp8,0,0.08227199812730153
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,8,1,128,1,float16,float16,0,0.07543466488520305
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,8,2,128,1,float16,float16,0,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,8,2,128,1,float16,fp8,0,0.07543999950091045
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,8,2,128,1,fp8,fp8,0,0.08225599924723308
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,8,8,128,1,float16,float16,0,0.04572799801826477
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,8,1,128,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,8,1,128,1,float16,fp8,0,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,8,4,128,1,fp8,fp8,0,0.08362133304278056
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,8,4,128,1,float16,float16,0,0.07679466903209686
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,8,4,128,1,float16,fp8,0,0.0771679977575938
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,8,8,128,1,fp8,fp8,0,0.05020266771316528
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,8,8,128,1,float16,fp8,0,0.046762665112813316
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,8,1,128,1,fp8,fp8,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,8,2,128,1,float16,float16,0,0.04642133414745331
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,8,2,128,1,float16,fp8,0,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,8,2,128,1,fp8,fp8,0,0.0481279989083608
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,8,4,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,8,8,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,8,4,128,1,float16,float16,0,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,8,4,128,1,float16,fp8,0,0.04642133414745331
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,8,8,128,1,float16,fp8,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,8,1,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,8,8,128,1,fp8,fp8,0,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,8,1,128,1,float16,fp8,0,0.03107733279466629
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,8,1,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,8,4,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,8,2,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,8,4,128,1,float16,fp8,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,8,4,128,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,8,8,128,1,float16,float16,0,0.02218666672706604
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,8,2,128,1,float16,fp8,0,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,8,2,128,1,fp8,fp8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,8,8,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,8,8,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,8,1,128,1,float16,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,8,1,128,1,fp8,fp8,0,0.023183998962243397
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,8,2,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,8,1,128,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,8,4,128,1,float16,float16,0,0.021520001192887623
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,8,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,8,2,128,1,float16,fp8,0,0.021183999876181286
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,8,4,128,1,float16,fp8,0,0.022890667120615642
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,8,4,128,1,fp8,fp8,0,0.022858666876951855
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,8,8,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,8,8,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,8,8,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,8,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,8,2,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,8,2,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,8,2,128,1,fp8,fp8,0,0.016783999900023144
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,8,1,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,8,1,128,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,8,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,8,4,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,8,8,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,8,4,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,8,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,8,4,128,1,float16,float16,0,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,8,8,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,8,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,8,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,8,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,8,2,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,8,4,128,1,float16,float16,0,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,8,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,8,2,128,1,fp8,fp8,0,0.01669866715868314
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,8,1,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,8,1,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,8,8,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,8,8,128,1,float16,float16,0,0.01669866715868314
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,8,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,8,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,8,8,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,8,2,128,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,8,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,8,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,8,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,8,8,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,8,4,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,8,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,8,8,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,8,1,128,1,float16,float16,0,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,8,8,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,8,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,8,2,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,8,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,8,4,128,1,float16,float16,0,0.015717333803574245
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,8,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,8,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,8,2,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,8,8,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,8,8,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,8,4,128,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,8,1,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,8,8,128,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,8,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,8,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,8,2,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,8,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,8,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,8,4,128,1,float16,fp8,0,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,8,4,128,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,8,4,128,1,float16,float16,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,4,1,128,1,float16,fp8,0,1.869823932647705
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,4,1,128,1,fp8,fp8,0,1.6279840469360352
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,4,1,128,1,float16,float16,0,1.8684800465901692
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,4,2,128,1,float16,float16,0,1.885701338450114
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,4,4,128,1,float16,fp8,0,1.039189338684082
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,4,2,128,1,fp8,fp8,0,1.6416373252868652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16384,4,2,128,1,float16,fp8,0,1.8800692558288574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,4,4,128,1,float16,float16,0,1.0306560198465984
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,4,1,128,1,float16,float16,0,0.9965173403422037
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,4,1,128,1,float16,fp8,0,1.0002773602803547
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,4,1,128,1,fp8,fp8,0,0.8912213643391927
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,4,4,128,1,float16,float16,0,0.5871040026346842
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,4,4,128,1,fp8,fp8,0,0.9065813223520914
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,4,2,128,1,float16,float16,0,1.0190613269805908
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,4,2,128,1,float16,fp8,0,1.0091466903686523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16384,4,2,128,1,fp8,fp8,0,0.8956586519877116
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,4,4,128,1,float16,fp8,0,0.5857280095418295
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,4,1,128,1,float16,float16,0,0.5676426490147909
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,4,1,128,1,fp8,fp8,0,0.516266663869222
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,4,1,128,1,float16,fp8,0,0.5730986595153809
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,4,4,128,1,fp8,fp8,0,0.5304319858551025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,4,2,128,1,float16,fp8,0,0.5758293469746908
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,4,2,128,1,float16,float16,0,0.5768533150355021
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16384,4,2,128,1,fp8,fp8,0,0.5225919882456461
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,4,4,128,1,float16,fp8,0,0.368127981821696
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,4,4,128,1,float16,float16,0,0.3677599827448527
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,4,1,128,1,float16,fp8,0,0.3657386700312297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,4,1,128,1,float16,float16,0,0.3643733263015747
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,4,1,128,1,fp8,fp8,0,0.334330677986145
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,4,4,128,1,fp8,fp8,0,0.33874134222666424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,4,2,128,1,float16,float16,0,0.3633493185043335
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,4,2,128,1,float16,fp8,0,0.3667626778284709
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16384,4,2,128,1,fp8,fp8,0,0.33364800612131756
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,4,1,128,1,float16,float16,0,1.1272532939910889
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,4,1,128,1,float16,fp8,0,1.1299839814503987
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,4,1,128,1,fp8,fp8,0,1.0009600321451824
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,4,2,128,1,float16,float16,0,1.1340800126393635
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,4,4,128,1,float16,float16,0,0.6340320110321045
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,4,4,128,1,float16,fp8,0,0.6384640137354533
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,4,2,128,1,float16,fp8,0,1.1426186561584473
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,12288,4,2,128,1,fp8,fp8,0,1.0135839780171711
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,4,1,128,1,float16,float16,0,0.614741325378418
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,4,1,128,1,float16,fp8,0,0.6205546855926514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,4,1,128,1,fp8,fp8,0,0.5577333370844523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,4,4,128,1,fp8,fp8,0,0.5761866569519043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,4,2,128,1,float16,float16,0,0.6214026610056559
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,4,2,128,1,float16,fp8,0,0.6234453519185384
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,12288,4,2,128,1,fp8,fp8,0,0.5611519813537598
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,4,4,128,1,float16,fp8,0,0.3742773135503133
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,4,1,128,1,float16,float16,0,0.3572106758753459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,4,4,128,1,float16,float16,0,0.3725653489430745
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,4,4,128,1,fp8,fp8,0,0.342522660891215
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,4,1,128,1,float16,fp8,0,0.3592533270517985
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,4,1,128,1,fp8,fp8,0,0.3285280068715413
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,4,2,128,1,float16,float16,0,0.36300798257191974
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,4,2,128,1,float16,fp8,0,0.3643999894460042
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,4,1,128,1,float16,float16,0,0.2389226754506429
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,4,4,128,1,float16,float16,0,0.24064532915751138
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,4,1,128,1,float16,fp8,0,0.23995733261108398
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,4,4,128,1,fp8,fp8,0,0.222543994585673
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,12288,4,2,128,1,fp8,fp8,0,0.33604268232981366
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,4,1,128,1,fp8,fp8,0,0.22152000665664673
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,4,2,128,1,float16,float16,0,0.2392746607462565
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,4,2,128,1,float16,fp8,0,0.24234666426976523
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,4,4,128,1,float16,fp8,0,0.244053324063619
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,12288,4,2,128,1,fp8,fp8,0,0.2225653330485026
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,4,1,128,1,float16,float16,0,0.8262080351511637
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,4,1,128,1,float16,fp8,0,0.8342239856719971
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,4,1,128,1,fp8,fp8,0,0.7406933307647705
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,4,2,128,1,float16,float16,0,0.8372960090637207
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,4,4,128,1,float16,fp8,0,0.4763306776682536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,4,2,128,1,fp8,fp8,0,0.7516160011291504
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,10240,4,2,128,1,float16,fp8,0,0.8396586577097574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,4,4,128,1,float16,float16,0,0.4763306776682536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,4,1,128,1,float16,float16,0,0.4575519959131877
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,4,1,128,1,float16,fp8,0,0.4561920166015625
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,4,4,128,1,fp8,fp8,0,0.4350293477376302
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,4,1,128,1,fp8,fp8,0,0.41999999682108563
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,4,2,128,1,float16,float16,0,0.46199464797973633
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,4,2,128,1,float16,fp8,0,0.46506667137145996
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,10240,4,2,128,1,fp8,fp8,0,0.4244426488876343
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,4,4,128,1,float16,fp8,0,0.29133333762486774
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,4,1,128,1,float16,float16,0,0.2821066578229268
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,4,4,128,1,fp8,fp8,0,0.2711893320083618
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,4,4,128,1,float16,float16,0,0.28962133328119916
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,4,1,128,1,float16,fp8,0,0.28142933050791424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,4,1,128,1,fp8,fp8,0,0.25702399015426636
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,4,2,128,1,float16,float16,0,0.2834773262341817
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,4,2,128,1,float16,fp8,0,0.2841493288675944
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,4,4,128,1,float16,float16,0,0.18311466773351034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,4,1,128,1,float16,float16,0,0.17919999361038208
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,4,4,128,1,float16,fp8,0,0.1855093240737915
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,10240,4,2,128,1,fp8,fp8,0,0.2637066642443339
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,4,1,128,1,float16,fp8,0,0.18107199668884277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,4,1,128,1,fp8,fp8,0,0.16674133141835532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,4,2,128,1,fp8,fp8,0,0.16778133312861124
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,4,4,128,1,fp8,fp8,0,0.17068799336751303
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,4,2,128,1,float16,fp8,0,0.18244266510009766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,10240,4,2,128,1,float16,float16,0,0.18039466937383017
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,4,1,128,1,float16,float16,0,1.0774239699045818
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,4,1,128,1,float16,fp8,0,1.085978666941325
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,4,1,128,1,fp8,fp8,0,0.966485341389974
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,4,4,128,1,float16,float16,0,0.593231995900472
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,4,4,128,1,float16,fp8,0,0.5993760029474894
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,4,2,128,1,float16,float16,0,1.0852693716684978
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,4,2,128,1,float16,fp8,0,1.0948533217112224
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,8192,4,2,128,1,fp8,fp8,0,0.97979736328125
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,4,1,128,1,float16,float16,0,0.5751519997914633
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,4,1,128,1,float16,fp8,0,0.5720746517181396
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,4,1,128,1,fp8,fp8,0,0.5229173501332601
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,4,4,128,1,fp8,fp8,0,0.5440853436787924
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,4,2,128,1,float16,float16,0,0.5778773228327433
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,4,2,128,1,float16,fp8,0,0.5806133349736532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,8192,4,2,128,1,fp8,fp8,0,0.5290666818618774
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,4,4,128,1,float16,fp8,0,0.3431946833928426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,4,1,128,1,float16,float16,0,0.3240799903869629
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,4,4,128,1,fp8,fp8,0,0.31488533814748126
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,4,1,128,1,float16,fp8,0,0.3251146674156189
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,4,1,128,1,fp8,fp8,0,0.2995199958483378
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,4,4,128,1,float16,float16,0,0.3384373188018799
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,4,2,128,1,float16,float16,0,0.32922132809956867
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,4,2,128,1,float16,fp8,0,0.33021867275238037
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,4,1,128,1,float16,float16,0,0.20753065745035806
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,4,4,128,1,float16,float16,0,0.21026132504145303
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,4,4,128,1,float16,fp8,0,0.21230934063593546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,8192,4,2,128,1,fp8,fp8,0,0.3063466747601827
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,4,4,128,1,fp8,fp8,0,0.19746132691701254
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,4,1,128,1,float16,fp8,0,0.20718934138615927
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,4,1,128,1,fp8,fp8,0,0.19063466787338257
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,4,4,128,1,float16,float16,0,0.13739200433095297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,4,4,128,1,float16,fp8,0,0.1397546629110972
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,4,2,128,1,float16,float16,0,0.20684266090393066
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,4,2,128,1,float16,fp8,0,0.20889067649841309
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,4,4,128,1,fp8,fp8,0,0.13329066832860312
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,8192,4,2,128,1,fp8,fp8,0,0.1925119956334432
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,4,1,128,1,float16,fp8,0,0.13738133509953818
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,4,1,128,1,fp8,fp8,0,0.1276639997959137
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,4,2,128,1,float16,float16,0,0.13753599921862283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,4,1,128,1,float16,float16,0,0.13618666927019754
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,4,2,128,1,float16,fp8,0,0.13772799571355185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,8192,4,2,128,1,fp8,fp8,0,0.1293653349081675
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,4,1,128,1,float16,fp8,0,0.6761813163757324
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,4,1,128,1,float16,float16,0,0.67413330078125
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,4,1,128,1,fp8,fp8,0,0.6164533297220866
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,4,4,128,1,float16,float16,0,0.3824640115102132
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,4,4,128,1,float16,fp8,0,0.38894931475321454
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,4,2,128,1,float16,fp8,0,0.6908586819966634
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,4,2,128,1,float16,float16,0,0.6813066800435384
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,6144,4,2,128,1,fp8,fp8,0,0.6299306551615397
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,4,1,128,1,float16,float16,0,0.3653973340988159
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,4,1,128,1,float16,fp8,0,0.3670986493428548
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,4,4,128,1,fp8,fp8,0,0.3595893383026123
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,4,1,128,1,fp8,fp8,0,0.3408213456471761
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,4,2,128,1,float16,fp8,0,0.37563733259836835
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,4,2,128,1,fp8,fp8,0,0.3466240167617798
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,6144,4,2,128,1,float16,float16,0,0.3715466658274333
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,4,4,128,1,float16,fp8,0,0.22732800245285034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,4,1,128,1,float16,float16,0,0.21505065759023032
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,4,1,128,1,float16,fp8,0,0.21504000822703043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,4,4,128,1,fp8,fp8,0,0.2112906575202942
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,4,4,128,1,float16,float16,0,0.22664533058802286
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,4,1,128,1,fp8,fp8,0,0.1974666714668274
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,4,2,128,1,float16,float16,0,0.21778666973114014
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,4,4,128,1,float16,fp8,0,0.14404267072677612
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,4,4,128,1,float16,float16,0,0.14385066429773966
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,4,2,128,1,float16,fp8,0,0.21777600049972534
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,4,4,128,1,fp8,fp8,0,0.13346133629480997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,6144,4,2,128,1,fp8,fp8,0,0.20514132579167685
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,4,1,128,1,float16,fp8,0,0.14114133516947427
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,4,1,128,1,float16,float16,0,0.13994133472442627
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,4,1,128,1,fp8,fp8,0,0.13037332892417908
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,4,2,128,1,float16,float16,0,0.14097066720326742
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,4,2,128,1,float16,fp8,0,0.1421440045038859
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,4,4,128,1,float16,float16,0,0.10786133011182149
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,4,4,128,1,float16,fp8,0,0.10753599802652995
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,6144,4,2,128,1,fp8,fp8,0,0.1302186648050944
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,4,1,128,1,fp8,fp8,0,0.0993280013402303
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,4,1,128,1,float16,fp8,0,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,4,2,128,1,float16,fp8,0,0.10786133011182149
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,4,2,128,1,float16,float16,0,0.10714667042096455
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,4,2,128,1,fp8,fp8,0,0.10069333513577779
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,4,4,128,1,fp8,fp8,0,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,6144,4,1,128,1,float16,float16,0,0.106495996316274
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,4,1,128,1,float16,float16,0,0.6939253012339274
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,4,1,128,1,float16,fp8,0,0.6963199774424235
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,4,1,128,1,fp8,fp8,0,0.6476906538009644
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,4,2,128,1,float16,fp8,0,0.7086079915364584
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,4,4,128,1,float16,float16,0,0.3925333420435588
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,4,4,128,1,float16,fp8,0,0.39561065038045246
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,4,2,128,1,fp8,fp8,0,0.6608213186264038
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,4096,4,2,128,1,float16,float16,0,0.7052000363667806
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,4,1,128,1,float16,float16,0,0.3667786518732707
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,4,1,128,1,float16,fp8,0,0.3671040137608846
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,4,4,128,1,fp8,fp8,0,0.36881065368652344
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,4,1,128,1,fp8,fp8,0,0.34593598047892254
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,4,4,128,1,float16,float16,0,0.22049599885940552
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,4,2,128,1,fp8,fp8,0,0.35516266028086346
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,4,2,128,1,float16,float16,0,0.3779946565628052
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,4,4,128,1,float16,fp8,0,0.22357332706451416
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,4096,4,2,128,1,float16,fp8,0,0.3800746599833171
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,4,4,128,1,fp8,fp8,0,0.20990933974583945
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,4,1,128,1,float16,float16,0,0.20787199338277182
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,4,1,128,1,float16,fp8,0,0.20889600118001303
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,4,1,128,1,fp8,fp8,0,0.1976319948832194
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,4,2,128,1,float16,float16,0,0.2146986722946167
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,4,2,128,1,float16,fp8,0,0.21640533208847046
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,4,1,128,1,float16,fp8,0,0.1302186648050944
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,4,1,128,1,float16,float16,0,0.13157866398493448
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,4,4,128,1,fp8,fp8,0,0.12902933359146118
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,4096,4,2,128,1,fp8,fp8,0,0.20308266083399454
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,4,4,128,1,float16,fp8,0,0.13704533378283182
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,4,1,128,1,fp8,fp8,0,0.12014933427174886
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,4,4,128,1,float16,float16,0,0.13636266191800436
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,4,2,128,1,float16,float16,0,0.13124799728393555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,4,2,128,1,float16,fp8,0,0.13294933239618936
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,4,4,128,1,float16,float16,0,0.08396800359090169
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,4,4,128,1,float16,fp8,0,0.0846506655216217
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,4096,4,2,128,1,fp8,fp8,0,0.12287466724713643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,4,1,128,1,float16,float16,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,4,4,128,1,fp8,fp8,0,0.07884799937407176
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,4,1,128,1,fp8,fp8,0,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,4,2,128,1,float16,float16,0,0.08296533425649007
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,4,2,128,1,float16,fp8,0,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,4,2,128,1,fp8,fp8,0,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,4096,4,1,128,1,float16,fp8,0,0.08363200227419536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,4,1,128,1,float16,float16,0,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,4,4,128,1,float16,fp8,0,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,4,1,128,1,float16,fp8,0,0.07646400233109792
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,4,4,128,1,float16,float16,0,0.07647466659545898
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,4,4,128,1,fp8,fp8,0,0.07235733171304067
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,4,1,128,1,fp8,fp8,0,0.07202666501204173
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,4,2,128,1,float16,float16,0,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,4,2,128,1,fp8,fp8,0,0.07200533151626587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,4096,4,2,128,1,float16,fp8,0,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,4,1,128,1,float16,float16,0,0.46609067916870117
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,4,1,128,1,float16,fp8,0,0.46916266282399494
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,4,1,128,1,fp8,fp8,0,0.4466400146484375
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,4,4,128,1,float16,float16,0,0.2667520046234131
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,4,2,128,1,float16,fp8,0,0.47838401794433594
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,4,2,128,1,float16,float16,0,0.47766931851704914
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,3072,4,2,128,1,fp8,fp8,0,0.4572213490804036
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,4,4,128,1,float16,fp8,0,0.27186665932337445
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,4,1,128,1,float16,float16,0,0.2515626748402913
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,4,1,128,1,float16,fp8,0,0.2529279987017314
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,4,1,128,1,fp8,fp8,0,0.24064000447591147
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,4,4,128,1,fp8,fp8,0,0.2590720057487488
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,4,2,128,1,float16,float16,0,0.25836799542109173
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,4,2,128,1,fp8,fp8,0,0.24883200724919638
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,3072,4,2,128,1,float16,fp8,0,0.25836267073949176
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,4,1,128,1,float16,float16,0,0.14079466462135315
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,4,4,128,1,float16,fp8,0,0.1570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,4,1,128,1,float16,fp8,0,0.1443839967250824
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,4,4,128,1,fp8,fp8,0,0.1495039959748586
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,4,4,128,1,float16,float16,0,0.1551359991232554
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,4,1,128,1,fp8,fp8,0,0.13310933113098145
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,4,2,128,1,float16,float16,0,0.1455573340257009
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,4,2,128,1,float16,fp8,0,0.14659733573595682
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,4,4,128,1,float16,fp8,0,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,3072,4,2,128,1,fp8,fp8,0,0.14011733730634054
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,4,4,128,1,float16,float16,0,0.09148266911506653
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,4,4,128,1,fp8,fp8,0,0.08943466345469157
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,4,1,128,1,float16,float16,0,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,4,1,128,1,float16,fp8,0,0.09215999643007915
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,4,1,128,1,fp8,fp8,0,0.0846506655216217
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,4,2,128,1,float16,fp8,0,0.09250666697820027
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,4,4,128,1,float16,fp8,0,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,4,2,128,1,float16,float16,0,0.09147733449935913
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,4,4,128,1,float16,float16,0,0.06760533154010773
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,3072,4,2,128,1,fp8,fp8,0,0.08669867118199666
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,4,1,128,1,float16,float16,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,4,1,128,1,float16,fp8,0,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,4,1,128,1,fp8,fp8,0,0.0631466656923294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,4,2,128,1,float16,float16,0,0.06620799998442332
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,4,4,128,1,float16,fp8,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,4,2,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,4,2,128,1,float16,fp8,0,0.06621333460013072
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,3072,4,4,128,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,4,4,128,1,float16,float16,0,0.061754668752352394
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,4,4,128,1,fp8,fp8,0,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,4,1,128,1,float16,float16,0,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,4,1,128,1,float16,fp8,0,0.06211733321348826
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,4,1,128,1,fp8,fp8,0,0.057999998331069946
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,4,2,128,1,float16,float16,0,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,4,2,128,1,float16,fp8,0,0.06177600224812826
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,3072,4,2,128,1,fp8,fp8,0,0.05870933334032694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,4,1,128,1,float16,float16,0,0.5532960096995035
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,4,1,128,1,float16,fp8,0,0.5553493499755859
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,4,1,128,1,fp8,fp8,0,0.535210649172465
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,4,4,128,1,float16,float16,0,0.3060106635093689
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,4,2,128,1,float16,fp8,0,0.5635413328806559
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,4,2,128,1,float16,float16,0,0.5601280132929484
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,2048,4,2,128,1,fp8,fp8,0,0.5481813351313273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,4,4,128,1,float16,fp8,0,0.31112533807754517
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,4,1,128,1,float16,float16,0,0.2855306665102641
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,4,1,128,1,float16,fp8,0,0.28518933057785034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,4,1,128,1,fp8,fp8,0,0.2797173261642456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,4,4,128,1,fp8,fp8,0,0.30293333530426025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,4,2,128,1,float16,float16,0,0.2926986614863078
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,4,2,128,1,float16,fp8,0,0.2926986614863078
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,2048,4,2,128,1,fp8,fp8,0,0.28757333755493164
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,4,1,128,1,float16,float16,0,0.15069866180419922
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,4,4,128,1,float16,fp8,0,0.16947199900945029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,4,4,128,1,fp8,fp8,0,0.16486400365829468
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,4,1,128,1,float16,fp8,0,0.15035733580589294
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,4,4,128,1,float16,float16,0,0.16810667514801025
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,4,1,128,1,fp8,fp8,0,0.1518933375676473
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,4,2,128,1,float16,float16,0,0.15904000401496887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,4,2,128,1,float16,fp8,0,0.15873066584269205
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,4,4,128,1,float16,fp8,0,0.0942080020904541
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,4,4,128,1,float16,float16,0,0.09455466270446777
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,4,1,128,1,float16,fp8,0,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,2048,4,2,128,1,fp8,fp8,0,0.15837867061297098
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,4,4,128,1,fp8,fp8,0,0.09489066402117412
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,4,1,128,1,float16,float16,0,0.09216533104578654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,4,1,128,1,fp8,fp8,0,0.08567999800046285
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,4,2,128,1,float16,float16,0,0.09114133318265279
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,4,2,128,1,float16,fp8,0,0.09147733449935913
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,4,4,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,2048,4,2,128,1,fp8,fp8,0,0.08806399504343669
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,4,4,128,1,float16,fp8,0,0.0580320010582606
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,4,4,128,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,4,1,128,1,float16,fp8,0,0.05766933163007101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,4,1,128,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,4,2,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,4,2,128,1,float16,float16,0,0.056320001681645714
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,4,2,128,1,fp8,fp8,0,0.05359466870625814
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,4,4,128,1,float16,float16,0,0.049498667319615684
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,4,4,128,1,float16,fp8,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,2048,4,1,128,1,float16,float16,0,0.055973331133524575
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,4,1,128,1,float16,fp8,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,4,4,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,4,1,128,1,float16,float16,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,4,1,128,1,fp8,fp8,0,0.047466665506362915
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,4,2,128,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,4,2,128,1,float16,fp8,0,0.05054933329423269
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,2048,4,2,128,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,4,4,128,1,float16,fp8,0,0.047450666626294456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,4,1,128,1,float16,float16,0,0.04573333263397217
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,4,4,128,1,fp8,fp8,0,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,4,2,128,1,float16,fp8,0,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,4,1,128,1,fp8,fp8,0,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,4,2,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,4,2,128,1,fp8,fp8,0,0.043338666359583534
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,4,4,128,1,float16,float16,0,0.04747200012207031
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,2048,4,1,128,1,float16,fp8,0,0.047450666626294456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,4,1,128,1,float16,float16,0,0.380079984664917
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,4,1,128,1,float16,fp8,0,0.3800746599833171
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,4,1,128,1,fp8,fp8,0,0.37837334473927814
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,4,2,128,1,float16,float16,0,0.38758401075998944
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,4,2,128,1,float16,fp8,0,0.38929065068562824
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1536,4,2,128,1,fp8,fp8,0,0.3872426748275757
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,4,4,128,1,float16,fp8,0,0.2208426594734192
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,4,4,128,1,float16,float16,0,0.22051199277242026
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,4,1,128,1,float16,float16,0,0.20188266038894653
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,4,1,128,1,float16,fp8,0,0.20241065820058188
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,4,4,128,1,fp8,fp8,0,0.22118399540583292
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,4,1,128,1,fp8,fp8,0,0.2027519941329956
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,4,2,128,1,float16,float16,0,0.20599999030431113
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,4,2,128,1,fp8,fp8,0,0.20923733711242676
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1536,4,2,128,1,float16,fp8,0,0.20684800545374551
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,4,4,128,1,float16,fp8,0,0.12322133779525757
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,4,1,128,1,float16,float16,0,0.11025066177050273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,4,4,128,1,fp8,fp8,0,0.1256160040696462
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,4,1,128,1,float16,fp8,0,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,4,4,128,1,float16,float16,0,0.12217600146929423
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,4,1,128,1,fp8,fp8,0,0.10922132929166158
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,4,2,128,1,float16,float16,0,0.11397866408030193
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,4,4,128,1,float16,float16,0,0.07235200206438701
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,4,2,128,1,float16,fp8,0,0.11639466881752014
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1536,4,2,128,1,fp8,fp8,0,0.11708266536394756
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,4,4,128,1,float16,fp8,0,0.07443733513355255
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,4,4,128,1,fp8,fp8,0,0.07166933516661327
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,4,1,128,1,float16,fp8,0,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,4,1,128,1,float16,float16,0,0.06861333549022675
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,4,1,128,1,fp8,fp8,0,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,4,2,128,1,float16,float16,0,0.07099733253320058
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,4,4,128,1,float16,float16,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,4,2,128,1,float16,fp8,0,0.07168533404668172
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1536,4,2,128,1,fp8,fp8,0,0.06826133529345195
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,4,4,128,1,float16,fp8,0,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,4,4,128,1,fp8,fp8,0,0.04779199759165446
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,4,2,128,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,4,1,128,1,fp8,fp8,0,0.04710933566093445
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,4,4,128,1,float16,float16,0,0.041984001795450844
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,4,1,128,1,float16,fp8,0,0.04948266843954722
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,4,2,128,1,fp8,fp8,0,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,4,2,128,1,float16,fp8,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,4,4,128,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1536,4,1,128,1,float16,float16,0,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,4,4,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,4,1,128,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,4,1,128,1,float16,float16,0,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,4,2,128,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,4,2,128,1,float16,fp8,0,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,4,2,128,1,float16,float16,0,0.04301866888999939
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1536,4,1,128,1,fp8,fp8,0,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,4,4,128,1,float16,fp8,0,0.03924266745646795
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,4,4,128,1,fp8,fp8,0,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,4,1,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,4,1,128,1,float16,fp8,0,0.039605334401130676
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,4,1,128,1,fp8,fp8,0,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,4,2,128,1,fp8,fp8,0,0.03718933214743932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,4,2,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,4,4,128,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1536,4,2,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,4,1,128,1,float16,float16,0,0.4130133390426636
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,4,1,128,1,float16,fp8,0,0.4092586835225423
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,4,1,128,1,fp8,fp8,0,0.42718398571014404
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,4,4,128,1,float16,fp8,0,0.22459733486175537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,4,2,128,1,float16,float16,0,0.420357346534729
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,4,4,128,1,float16,float16,0,0.2307413419087728
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,4,2,128,1,float16,fp8,0,0.4140586853027344
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,1024,4,2,128,1,fp8,fp8,0,0.4363946517308553
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,4,1,128,1,float16,float16,0,0.21470399697621664
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,4,1,128,1,float16,fp8,0,0.212991992632548
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,4,1,128,1,fp8,fp8,0,0.2228906750679016
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,4,4,128,1,fp8,fp8,0,0.23859200874964395
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,4,2,128,1,float16,float16,0,0.21980800231297812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,4,4,128,1,float16,float16,0,0.1269813378651937
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,4,2,128,1,fp8,fp8,0,0.2283359964688619
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,4,4,128,1,float16,fp8,0,0.12527466813723245
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,4,1,128,1,float16,float16,0,0.11639466881752014
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,1024,4,2,128,1,float16,fp8,0,0.21947733561197916
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,4,4,128,1,fp8,fp8,0,0.12902933359146118
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,4,1,128,1,float16,fp8,0,0.1153706709543864
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,4,1,128,1,fp8,fp8,0,0.11979732910792033
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,4,2,128,1,float16,float16,0,0.11776000261306763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,4,4,128,1,float16,float16,0,0.07236266632874806
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,4,1,128,1,float16,float16,0,0.06962666908899943
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,4,2,128,1,float16,fp8,0,0.11844266454378764
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,4,1,128,1,float16,fp8,0,0.06861866513888042
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,1024,4,2,128,1,fp8,fp8,0,0.12356266379356384
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,4,4,128,1,fp8,fp8,0,0.07374399900436401
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,4,1,128,1,fp8,fp8,0,0.068271999557813
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,4,4,128,1,float16,fp8,0,0.07270933190981548
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,4,2,128,1,float16,float16,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,4,1,128,1,float16,float16,0,0.040965333580970764
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,4,4,128,1,float16,float16,0,0.041637333730856575
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,4,4,128,1,float16,fp8,0,0.04299733539422353
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,4,2,128,1,float16,fp8,0,0.06929066777229309
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,1024,4,2,128,1,fp8,fp8,0,0.06997333467006683
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,4,4,128,1,fp8,fp8,0,0.04332800209522247
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,4,2,128,1,float16,float16,0,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,4,1,128,1,fp8,fp8,0,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,4,2,128,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,4,1,128,1,float16,fp8,0,0.04334400097529093
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,1024,4,2,128,1,fp8,fp8,0,0.04095999896526337
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,4,4,128,1,float16,fp8,0,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,4,4,128,1,float16,float16,0,0.03448000053564707
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,4,4,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,4,1,128,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,4,1,128,1,fp8,fp8,0,0.03311999887228012
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,4,2,128,1,float16,fp8,0,0.03481066723664602
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,4,2,128,1,fp8,fp8,0,0.033717334270477295
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,4,4,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,4,1,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,4,4,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,4,1,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,4,1,128,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,4,4,128,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,1024,4,2,128,1,float16,float16,0,0.03345600018898646
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,4,1,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,4,2,128,1,float16,float16,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,4,2,128,1,fp8,fp8,0,0.029359998802344005
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,1024,4,2,128,1,float16,fp8,0,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,4,4,128,1,float16,float16,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,4,1,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,4,1,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,4,2,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,4,2,128,1,float16,fp8,0,0.029359998802344005
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,4,4,128,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,4,4,128,1,fp8,fp8,0,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,4,2,128,1,fp8,fp8,0,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,1024,4,1,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,4,1,128,1,float16,fp8,0,0.36061867078145343
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,4,1,128,1,float16,float16,0,0.3630026578903198
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,4,1,128,1,fp8,fp8,0,0.3875519831975301
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,4,4,128,1,float16,float16,0,0.20394132534662882
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,4,4,128,1,float16,fp8,0,0.20122132698694864
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,4,2,128,1,float16,float16,0,0.36983466148376465
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,4,2,128,1,float16,fp8,0,0.36744534969329834
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,512,4,2,128,1,fp8,fp8,0,0.3973120053609212
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,4,1,128,1,float16,float16,0,0.18875734011332193
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,4,1,128,1,float16,fp8,0,0.1880693236986796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,4,1,128,1,fp8,fp8,0,0.2039573391278585
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,4,4,128,1,fp8,fp8,0,0.21742933988571167
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,4,4,128,1,float16,float16,0,0.11229333281517029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,4,2,128,1,float16,fp8,0,0.19046932458877563
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,4,2,128,1,fp8,fp8,0,0.20770132541656494
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,512,4,2,128,1,float16,float16,0,0.19524266322453818
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,4,1,128,1,float16,float16,0,0.10342400272687276
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,4,4,128,1,float16,fp8,0,0.11126933495203654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,4,4,128,1,fp8,fp8,0,0.11845866839090984
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,4,1,128,1,float16,fp8,0,0.10172800223032634
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,4,1,128,1,fp8,fp8,0,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,4,2,128,1,float16,float16,0,0.10513599713643391
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,4,2,128,1,float16,fp8,0,0.10478933652242024
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,4,4,128,1,float16,float16,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,4,4,128,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,512,4,2,128,1,fp8,fp8,0,0.11537599563598633
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,4,1,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,4,4,128,1,fp8,fp8,0,0.06588266789913177
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,4,1,128,1,float16,fp8,0,0.058373332023620605
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,4,1,128,1,fp8,fp8,0,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,4,2,128,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,4,2,128,1,float16,fp8,0,0.05836800237496694
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,4,1,128,1,float16,fp8,0,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,4,4,128,1,fp8,fp8,0,0.03753600021203359
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,4,1,128,1,float16,float16,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,4,1,128,1,fp8,fp8,0,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,4,4,128,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,512,4,2,128,1,fp8,fp8,0,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,4,2,128,1,float16,float16,0,0.036506667733192444
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,4,4,128,1,float16,float16,0,0.03822933385769526
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,4,2,128,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,4,4,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,512,4,2,128,1,fp8,fp8,0,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,4,4,128,1,float16,float16,0,0.029002666473388672
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,4,1,128,1,float16,float16,0,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,4,1,128,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,4,4,128,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,4,2,128,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,4,2,128,1,fp8,fp8,0,0.028981332977612812
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,4,4,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,4,2,128,1,float16,fp8,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,4,1,128,1,float16,float16,0,0.02492800106604894
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,4,4,128,1,float16,float16,0,0.02489600082238515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,512,4,1,128,1,fp8,fp8,0,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,4,1,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,4,2,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,4,2,128,1,fp8,fp8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,4,1,128,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,4,2,128,1,float16,fp8,0,0.025253333151340485
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,4,4,128,1,float16,float16,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,512,4,4,128,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,4,4,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,4,4,128,1,float16,fp8,0,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,4,1,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,4,1,128,1,float16,fp8,0,0.024906667570273083
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,4,1,128,1,fp8,fp8,0,0.024234667420387268
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,4,2,128,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,4,2,128,1,float16,float16,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,4,4,128,1,float16,float16,0,0.023893333971500397
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,4,4,128,1,fp8,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,4,1,128,1,float16,float16,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,4,4,128,1,float16,fp8,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,4,1,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,4,2,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,4,2,128,1,float16,fp8,0,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,4,1,128,1,float16,fp8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,512,4,2,128,1,float16,float16,0,0.022885332504908245
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,512,4,2,128,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,4,1,128,1,fp8,fp8,0,0.15717867016792297
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,4,1,128,1,float16,fp8,0,0.1423306663831075
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,4,1,128,1,float16,float16,0,0.14762666821479797
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,4,4,128,1,float16,float16,0,0.09762133161226909
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,4,2,128,1,fp8,fp8,0,0.1597493290901184
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,4,4,128,1,float16,fp8,0,0.09046933054924011
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,4,2,128,1,float16,fp8,0,0.15123732884724936
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,256,4,2,128,1,float16,float16,0,0.15411200126012167
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,4,1,128,1,float16,fp8,0,0.08123733103275299
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,4,1,128,1,fp8,fp8,0,0.0846506655216217
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,4,4,128,1,float16,float16,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,4,4,128,1,fp8,fp8,0,0.09692800045013428
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,4,2,128,1,float16,float16,0,0.0846506655216217
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,4,1,128,1,float16,float16,0,0.08157333234945933
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,4,2,128,1,float16,fp8,0,0.08194666604201
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,256,4,2,128,1,fp8,fp8,0,0.08771733442942302
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,4,1,128,1,float16,float16,0,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,4,1,128,1,float16,fp8,0,0.047450666626294456
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,4,1,128,1,fp8,fp8,0,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,4,4,128,1,fp8,fp8,0,0.05461333195368449
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,4,4,128,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,4,2,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,4,2,128,1,fp8,fp8,0,0.05018133421738943
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,4,4,128,1,float16,float16,0,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,256,4,2,128,1,float16,float16,0,0.04776533444722494
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,4,4,128,1,fp8,fp8,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,4,4,128,1,float16,fp8,0,0.03139200061559677
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,4,1,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,4,1,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,4,1,128,1,fp8,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,4,2,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,4,2,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,4,4,128,1,float16,float16,0,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,4,1,128,1,float16,float16,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,4,4,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,4,4,128,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,4,1,128,1,float16,fp8,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,4,2,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,4,1,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,256,4,2,128,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,4,2,128,1,float16,fp8,0,0.025605333348115284
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,256,4,2,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,4,4,128,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,4,4,128,1,float16,fp8,0,0.021488000949223835
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,4,1,128,1,float16,float16,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,4,4,128,1,fp8,fp8,0,0.022848000129063923
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,4,1,128,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,4,2,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,4,2,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,4,2,128,1,fp8,fp8,0,0.020810666183630627
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,4,4,128,1,float16,float16,0,0.02080533280968666
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,4,4,128,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,4,4,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,4,1,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,256,4,1,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,4,1,128,1,float16,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,4,2,128,1,fp8,fp8,0,0.020138667275508244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,4,2,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,4,1,128,1,fp8,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,256,4,2,128,1,float16,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,4,4,128,1,float16,float16,0,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,4,4,128,1,float16,fp8,0,0.020831999679406483
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,4,1,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,4,1,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,4,1,128,1,fp8,fp8,0,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,4,2,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,4,2,128,1,float16,float16,0,0.019797333826621372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,4,2,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,4,4,128,1,float16,float16,0,0.02080533280968666
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,4,4,128,1,float16,fp8,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,256,4,4,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,4,1,128,1,float16,float16,0,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,4,1,128,1,float16,fp8,0,0.02083733429511388
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,4,4,128,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,4,1,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,4,2,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,4,2,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,256,4,2,128,1,fp8,fp8,0,0.019802667200565338
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,4,1,128,1,fp8,fp8,0,0.08840533097585042
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,4,1,128,1,float16,fp8,0,0.08087466657161713
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,4,2,128,1,float16,fp8,0,0.08672533432642619
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,4,2,128,1,float16,float16,0,0.08533333738644917
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,4,4,128,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,4,4,128,1,float16,float16,0,0.053247998158137
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,4,1,128,1,float16,float16,0,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,128,4,2,128,1,fp8,fp8,0,0.09282132983207703
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,4,1,128,1,float16,fp8,0,0.0481279989083608
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,4,1,128,1,float16,float16,0,0.049173335234324135
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,4,4,128,1,fp8,fp8,0,0.05598400036493937
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,4,4,128,1,float16,float16,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,4,2,128,1,float16,float16,0,0.04779199759165446
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,4,2,128,1,float16,fp8,0,0.04948266843954722
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,4,4,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,4,1,128,1,float16,float16,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,4,1,128,1,float16,fp8,0,0.03107200066248576
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,4,1,128,1,fp8,fp8,0,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,4,4,128,1,fp8,fp8,0,0.034815999368826546
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,128,4,2,128,1,fp8,fp8,0,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,4,1,128,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,4,2,128,1,float16,fp8,0,0.031386665999889374
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,4,2,128,1,fp8,fp8,0,0.03137599925200144
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,128,4,2,128,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,4,4,128,1,float16,float16,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,4,4,128,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,4,1,128,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,4,1,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,4,2,128,1,float16,float16,0,0.022848000129063923
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,4,4,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,4,2,128,1,fp8,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,4,4,128,1,float16,float16,0,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,4,2,128,1,float16,fp8,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,4,4,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,128,4,1,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,4,4,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,4,1,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,4,1,128,1,fp8,fp8,0,0.01878400022784869
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,4,2,128,1,float16,float16,0,0.018789333601792652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,4,2,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,4,2,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,128,4,1,128,1,float16,fp8,0,0.018789333601792652
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,4,4,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,4,1,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,4,4,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,4,1,128,1,float16,fp8,0,0.01669866715868314
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,4,1,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,4,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,4,4,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,4,2,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,4,4,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,128,4,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,4,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,4,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,4,1,128,1,float16,float16,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,4,1,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,4,2,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,4,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,4,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,4,4,128,1,float16,fp8,0,0.016762666404247284
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,4,4,128,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,4,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,4,1,128,1,float16,float16,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,4,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,128,4,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,4,1,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,4,2,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,4,2,128,1,float16,fp8,0,0.01669866715868314
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,128,4,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,4,4,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,4,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,4,4,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,4,1,128,1,float16,fp8,0,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,4,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,4,2,128,1,float16,float16,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,4,2,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,4,2,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,128,4,1,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,4,1,128,1,float16,float16,0,0.05973866581916809
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,4,1,128,1,float16,fp8,0,0.06005866825580597
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,4,2,128,1,float16,fp8,0,0.06075199941794077
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,4,2,128,1,float16,float16,0,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,4,2,128,1,fp8,fp8,0,0.06588266789913177
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,4,4,128,1,float16,float16,0,0.03961066653331121
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,64,4,1,128,1,fp8,fp8,0,0.06485333542029063
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,4,1,128,1,float16,float16,0,0.038917332887649536
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,4,4,128,1,fp8,fp8,0,0.041984001795450844
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,4,1,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,4,1,128,1,fp8,fp8,0,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,4,2,128,1,float16,float16,0,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,4,4,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,4,4,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,4,4,128,1,fp8,fp8,0,0.027647999425729115
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,4,1,128,1,float16,float16,0,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,4,2,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,4,1,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,4,4,128,1,float16,fp8,0,0.026954665780067444
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,64,4,2,128,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,4,2,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,4,2,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,4,2,128,1,fp8,fp8,0,0.025946666797002155
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,4,4,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,4,4,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,4,4,128,1,fp8,fp8,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,4,1,128,1,float16,fp8,0,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,64,4,1,128,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,4,1,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,4,1,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,4,2,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,4,2,128,1,float16,fp8,0,0.01876266673207283
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,64,4,2,128,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,4,4,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,4,1,128,1,float16,float16,0,0.01740266631046931
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,4,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,4,1,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,4,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,4,2,128,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,4,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,4,2,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,4,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,4,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,4,4,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,64,4,4,128,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,4,1,128,1,float16,float16,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,4,1,128,1,float16,fp8,0,0.01670933390657107
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,4,1,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,4,2,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,4,2,128,1,float16,float16,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,64,4,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,4,4,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,4,1,128,1,float16,float16,0,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,4,1,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,4,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,4,1,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,4,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,4,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,4,2,128,1,fp8,fp8,0,0.016751999656359356
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,4,4,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,64,4,4,128,1,float16,fp8,0,0.016048000504573185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,4,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,4,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,4,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,4,1,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,4,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,4,4,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,4,2,128,1,float16,fp8,0,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,4,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,4,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,4,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,4,1,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,4,1,128,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,4,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,4,2,128,1,float16,fp8,0,0.016741332908471424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,4,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,64,4,2,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,64,4,2,128,1,fp8,fp8,0,0.016037333756685257
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,4,1,128,1,float16,float16,0,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,4,1,128,1,float16,fp8,0,0.05119466781616211
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,4,1,128,1,fp8,fp8,0,0.053583999474843345
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,4,2,128,1,float16,float16,0,0.052895997961362205
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,4,4,128,1,float16,float16,0,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,4,2,128,1,float16,fp8,0,0.051551997661590576
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,32,4,2,128,1,fp8,fp8,0,0.053930665055910744
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,4,4,128,1,fp8,fp8,0,0.03514133393764496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,4,1,128,1,float16,float16,0,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,4,1,128,1,float16,fp8,0,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,4,1,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,4,2,128,1,float16,float16,0,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,4,4,128,1,float16,fp8,0,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,4,2,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,4,4,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,4,4,128,1,fp8,fp8,0,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,32,4,2,128,1,fp8,fp8,0,0.033439998825391136
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,4,1,128,1,float16,float16,0,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,4,1,128,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,4,4,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,4,2,128,1,float16,float16,0,0.022853332261244457
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,4,2,128,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,4,2,128,1,fp8,fp8,0,0.023541333774725597
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,4,4,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,4,4,128,1,float16,float16,0,0.01878400022784869
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,4,4,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,32,4,1,128,1,fp8,fp8,0,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,4,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,4,2,128,1,float16,float16,0,0.018085333208243053
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,4,1,128,1,fp8,fp8,0,0.01878400022784869
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,4,2,128,1,float16,fp8,0,0.01878400022784869
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,4,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,4,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,32,4,2,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,4,1,128,1,float16,float16,0,0.016037333756685257
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,4,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,4,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,4,1,128,1,float16,fp8,0,0.01674666628241539
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,4,2,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,4,2,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,4,4,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,4,4,128,1,fp8,fp8,0,0.016741332908471424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,4,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,4,1,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,4,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,4,1,128,1,float16,float16,0,0.016042667130629223
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,4,4,128,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,32,4,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,4,2,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,4,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,32,4,2,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,4,4,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,4,4,128,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,4,1,128,1,fp8,fp8,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,4,2,128,1,float16,float16,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,4,2,128,1,fp8,fp8,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,4,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,4,4,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,4,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,4,4,128,1,float16,fp8,0,0.01670933390657107
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,4,1,128,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,32,4,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,4,1,128,1,float16,float16,0,0.015002666662136713
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,4,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,4,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,4,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,4,2,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,4,4,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,4,4,128,1,fp8,fp8,0,0.016751999656359356
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,4,1,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,4,2,128,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,4,1,128,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,4,1,128,1,float16,fp8,0,0.016741332908471424
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,4,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,32,4,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,4,2,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,4,2,128,1,float16,float16,0,0.01498666654030482
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,32,4,2,128,1,fp8,fp8,0,0.016751999656359356
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,4,1,128,1,float16,fp8,0,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,4,1,128,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,4,1,128,1,float16,float16,0,0.04643733302752177
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,4,4,128,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,4,2,128,1,fp8,fp8,0,0.049498667319615684
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,4,4,128,1,float16,fp8,0,0.029338667790095013
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,4,2,128,1,float16,float16,0,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,4,4,128,1,fp8,fp8,0,0.03173866619666418
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,4,1,128,1,float16,float16,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,256,16,4,2,128,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,4,1,128,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,4,2,128,1,float16,float16,0,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,4,2,128,1,float16,fp8,0,0.03035199890534083
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,4,1,128,1,float16,fp8,0,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,4,4,128,1,float16,float16,0,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,128,16,4,2,128,1,fp8,fp8,0,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,4,1,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,4,4,128,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,4,1,128,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,4,1,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,4,2,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,4,2,128,1,float16,float16,0,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,4,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,4,4,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,4,4,128,1,fp8,fp8,0,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,4,1,128,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,4,1,128,1,float16,float16,0,0.01670933390657107
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,4,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,64,16,4,2,128,1,fp8,fp8,0,0.022533332308133442
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,4,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,4,2,128,1,float16,float16,0,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,4,2,128,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,4,4,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,4,4,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,4,1,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,4,1,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,4,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,4,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,4,2,128,1,float16,fp8,0,0.01670933390657107
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,4,4,128,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,32,16,4,2,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,4,4,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,16,16,4,2,128,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,4,4,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,4,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,4,1,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,4,4,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,4,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,4,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,4,2,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,4,4,128,1,float16,float16,0,0.016751999656359356
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,4,4,128,1,float16,fp8,0,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,4,4,128,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,4,1,128,1,float16,float16,0,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,4,1,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,8,16,4,2,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,4,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,4,2,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,4,2,128,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,4,4,128,1,float16,float16,0,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,4,16,4,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,4,4,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,4,4,128,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,4,1,128,1,float16,fp8,0,0.016751999656359356
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,4,2,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,4,1,128,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,4,2,128,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,4,4,128,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,4,4,128,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,4,2,128,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,4,4,128,1,fp8,fp8,0,0.016704000532627106
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,4,1,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,4,1,128,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,2,16,4,1,128,1,float16,float16,0,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,4,1,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,4,2,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,4,2,128,1,float16,float16,0,0.014655999839305878
TRTLLM,1.0.0rc6,NVIDIA B200,context_attention,torch_flow,1,16,4,2,128,1,fp8,fp8,0,0.016730666160583496
