framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,64,1,128,1,float16,float16,0,62.52078755696615
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,64,2,128,1,float16,fp8,0,55.56855265299479
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,64,8,128,1,float16,float16,0,61.649383544921875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,64,4,128,1,float16,float16,0,64.01852416992188
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,64,1,128,1,float16,fp8,0,55.5015614827474
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,64,2,128,1,float16,float16,0,63.99754333496094
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,64,8,128,1,float16,fp8,0,56.49401346842448
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,64,64,128,1,float16,float16,0,33.353291829427086
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,64,64,128,1,float16,fp8,0,30.712420145670574
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,64,4,128,1,float16,fp8,0,55.907877604166664
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,64,1,128,1,float16,float16,0,32.20153045654297
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,64,1,128,1,float16,fp8,0,28.01458231608073
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,64,2,128,1,float16,float16,0,34.225626627604164
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,64,2,128,1,float16,fp8,0,28.185994466145832
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,64,64,128,1,float16,float16,0,16.480010986328125
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,64,4,128,1,float16,fp8,0,28.438331604003906
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,64,64,128,1,float16,fp8,0,14.762533823649088
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,64,4,128,1,float16,float16,0,34.008593241373696
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,64,8,128,1,float16,float16,0,31.014597574869793
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,64,1,128,1,float16,float16,0,15.837652842203775
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,64,8,128,1,float16,fp8,0,28.492207845052082
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,64,1,128,1,float16,fp8,0,14.028128306070963
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,64,2,128,1,float16,fp8,0,14.111178080240885
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,64,2,128,1,float16,float16,0,15.841359456380209
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,64,4,128,1,float16,fp8,0,14.152159372965494
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,64,4,128,1,float16,float16,0,16.17193603515625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,64,64,128,1,float16,fp8,0,7.417162577311198
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,64,64,128,1,float16,float16,0,7.673231760660808
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,64,8,128,1,float16,float16,0,15.929061889648438
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,64,1,128,1,float16,float16,0,7.720101038614909
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,64,1,128,1,float16,fp8,0,7.168170928955078
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,64,2,128,1,float16,float16,0,7.133871714274089
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,64,8,128,1,float16,fp8,0,14.286640167236328
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,64,2,128,1,float16,fp8,0,7.098880132039388
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,64,4,128,1,float16,float16,0,7.940581639607747
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,64,4,128,1,float16,fp8,0,7.301445643107097
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,64,8,128,1,float16,float16,0,7.791877110799153
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,64,8,128,1,float16,fp8,0,7.439178466796875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,64,1,128,1,float16,fp8,0,31.75262451171875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,64,2,128,1,float16,fp8,0,31.85796356201172
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,64,1,128,1,float16,float16,0,36.73510996500651
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,64,2,128,1,float16,float16,0,36.10458119710287
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,64,4,128,1,float16,fp8,0,32.14543914794922
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,64,4,128,1,float16,float16,0,37.43374379475912
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,64,8,128,1,float16,fp8,0,32.50853983561198
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,64,8,128,1,float16,float16,0,36.46920522054037
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,64,64,128,1,float16,fp8,0,17.06011199951172
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,64,64,128,1,float16,float16,0,19.177754720052082
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,64,1,128,1,float16,fp8,0,15.941023508707682
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,64,1,128,1,float16,float16,0,17.89246368408203
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,64,2,128,1,float16,float16,0,17.827925364176433
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,64,2,128,1,float16,fp8,0,16.078266143798828
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,64,4,128,1,float16,float16,0,18.164160410563152
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,64,4,128,1,float16,fp8,0,16.237263997395832
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,64,64,128,1,float16,float16,0,9.696309407552084
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,64,64,128,1,float16,fp8,0,8.649989446004232
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,64,1,128,1,float16,fp8,0,8.00104014078776
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,64,1,128,1,float16,float16,0,8.70738665262858
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,64,2,128,1,float16,float16,0,8.804666519165039
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,64,2,128,1,float16,fp8,0,8.039728164672852
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,64,8,128,1,float16,float16,0,18.116811116536457
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,64,8,128,1,float16,fp8,0,16.342304229736328
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,64,4,128,1,float16,fp8,0,8.189530690511068
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,64,4,128,1,float16,float16,0,9.245285034179688
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,64,64,128,1,float16,float16,0,4.399231910705566
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,64,64,128,1,float16,fp8,0,4.339834531148274
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,64,8,128,1,float16,float16,0,8.960896174112955
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,64,8,128,1,float16,fp8,0,8.379456202189127
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,64,1,128,1,float16,float16,0,4.099856058756511
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,64,1,128,1,float16,fp8,0,4.071375846862793
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,64,2,128,1,float16,float16,0,4.207807858784993
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,64,2,128,1,float16,fp8,0,4.241680145263672
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,64,4,128,1,float16,float16,0,4.232069333394368
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,64,4,128,1,float16,fp8,0,4.112485249837239
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,64,8,128,1,float16,float16,0,4.279637336730957
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,64,8,128,1,float16,fp8,0,4.1623945236206055
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,64,1,128,1,float16,fp8,0,22.239290873209637
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,64,1,128,1,float16,float16,0,25.414873758951824
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,64,2,128,1,float16,fp8,0,22.38714599609375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,64,2,128,1,float16,float16,0,25.233131408691406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,64,8,128,1,float16,fp8,0,22.83306121826172
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,64,4,128,1,float16,fp8,0,22.71661885579427
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,64,4,128,1,float16,float16,0,25.768592834472656
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,64,8,128,1,float16,float16,0,25.376080830891926
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,64,64,128,1,float16,float16,0,13.554939270019531
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,64,1,128,1,float16,float16,0,12.620010375976562
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,64,1,128,1,float16,fp8,0,11.241322835286459
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,64,64,128,1,float16,fp8,0,12.209791819254557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,64,2,128,1,float16,fp8,0,11.204442342122396
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,64,2,128,1,float16,float16,0,12.676453908284506
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,64,4,128,1,float16,float16,0,12.706287384033203
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,64,4,128,1,float16,fp8,0,11.644911448160807
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,64,64,128,1,float16,float16,0,6.455466588338216
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,64,64,128,1,float16,fp8,0,6.137429555257161
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,64,1,128,1,float16,float16,0,6.032426834106445
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,64,1,128,1,float16,fp8,0,5.841205596923828
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,64,2,128,1,float16,float16,0,6.116373062133789
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,64,2,128,1,float16,fp8,0,5.682837168375651
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,64,8,128,1,float16,float16,0,12.686283111572266
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,64,8,128,1,float16,fp8,0,11.513258616129557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,64,64,128,1,float16,float16,0,3.1069065729777017
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,64,64,128,1,float16,fp8,0,3.1181227366129556
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,64,4,128,1,float16,float16,0,6.074352264404297
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,64,4,128,1,float16,fp8,0,5.810261408487956
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,64,8,128,1,float16,float16,0,6.018288294474284
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,64,8,128,1,float16,fp8,0,5.80738639831543
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,64,1,128,1,float16,float16,0,2.947349230448405
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,64,2,128,1,float16,float16,0,2.8580106099446616
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,64,1,128,1,float16,fp8,0,2.9808371861775718
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,64,2,128,1,float16,fp8,0,2.9356533686319985
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,64,4,128,1,float16,float16,0,2.907909393310547
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,64,8,128,1,float16,float16,0,2.797504107157389
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,64,4,128,1,float16,fp8,0,3.320021311442057
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,64,8,128,1,float16,fp8,0,3.1453119913736978
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,64,1,128,1,float16,fp8,0,29.12378184000651
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,64,2,128,1,float16,fp8,0,29.29969533284505
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,64,1,128,1,float16,float16,0,32.68805948893229
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,64,4,128,1,float16,fp8,0,29.951009114583332
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,64,2,128,1,float16,float16,0,32.72954559326172
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,64,4,128,1,float16,float16,0,33.102378845214844
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,64,8,128,1,float16,float16,0,32.70516713460287
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,64,8,128,1,float16,fp8,0,29.742218017578125
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,64,64,128,1,float16,float16,0,17.550655364990234
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,64,1,128,1,float16,float16,0,16.390064239501953
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,64,1,128,1,float16,fp8,0,14.549818674723307
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,64,64,128,1,float16,fp8,0,16.13750457763672
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,64,2,128,1,float16,fp8,0,14.699296315511068
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,64,2,128,1,float16,float16,0,16.36080042521159
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,64,4,128,1,float16,float16,0,16.632544199625652
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,64,4,128,1,float16,fp8,0,14.948331197102865
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,64,64,128,1,float16,float16,0,8.801061630249023
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,64,64,128,1,float16,fp8,0,8.110464096069336
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,64,1,128,1,float16,float16,0,7.958159764607747
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,64,1,128,1,float16,fp8,0,7.386186599731445
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,64,2,128,1,float16,float16,0,7.7164052327473955
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,64,2,128,1,float16,fp8,0,7.333221435546875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,64,8,128,1,float16,float16,0,16.368724822998047
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,64,8,128,1,float16,fp8,0,15.06177012125651
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,64,64,128,1,float16,float16,0,4.391189257303874
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,64,64,128,1,float16,fp8,0,4.068832079569499
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,64,4,128,1,float16,fp8,0,7.574272155761719
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,64,4,128,1,float16,float16,0,8.051685333251953
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,64,8,128,1,float16,fp8,0,7.579919815063477
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,64,8,128,1,float16,float16,0,8.055706659952799
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,64,1,128,1,float16,fp8,0,3.7084693908691406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,64,1,128,1,float16,float16,0,3.7587254842122397
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,64,2,128,1,float16,fp8,0,3.697829246520996
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,64,2,128,1,float16,float16,0,3.703488032023112
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,64,4,128,1,float16,float16,0,3.863978703816732
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,64,4,128,1,float16,fp8,0,4.134544054667155
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,64,64,128,1,float16,float16,0,2.025178591410319
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,64,64,128,1,float16,fp8,0,2.192837397257487
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,64,8,128,1,float16,float16,0,3.6726452509562173
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,64,8,128,1,float16,fp8,0,4.077871958414714
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,64,1,128,1,float16,float16,0,1.8549866676330566
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,64,1,128,1,float16,fp8,0,2.030277411142985
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,64,2,128,1,float16,float16,0,1.913493315378825
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,64,2,128,1,float16,fp8,0,1.9192959467569988
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,64,4,128,1,float16,float16,0,1.912325382232666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,64,4,128,1,float16,fp8,0,1.982965310414632
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,64,8,128,1,float16,float16,0,1.8773919741312664
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,64,8,128,1,float16,fp8,0,1.9643252690633137
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,64,1,128,1,float16,float16,0,18.99732208251953
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,64,1,128,1,float16,fp8,0,16.96133295694987
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,64,2,128,1,float16,fp8,0,17.11195246378581
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,64,2,128,1,float16,float16,0,19.246949513753254
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,64,4,128,1,float16,float16,0,19.372650146484375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,64,8,128,1,float16,float16,0,18.832271575927734
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,64,4,128,1,float16,fp8,0,17.689236958821613
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,64,8,128,1,float16,fp8,0,17.53331247965495
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,64,1,128,1,float16,float16,0,9.360624313354492
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,64,1,128,1,float16,fp8,0,8.774314880371094
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,64,64,128,1,float16,fp8,0,9.64914639790853
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,64,2,128,1,float16,fp8,0,8.627093633015951
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,64,64,128,1,float16,float16,0,10.228282928466797
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,64,2,128,1,float16,float16,0,9.36515744527181
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,64,4,128,1,float16,float16,0,9.57857577006022
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,64,4,128,1,float16,fp8,0,8.779637018839518
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,64,64,128,1,float16,float16,0,4.812069257100423
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,64,8,128,1,float16,float16,0,9.37284787495931
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,64,64,128,1,float16,fp8,0,4.855994542439778
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,64,8,128,1,float16,fp8,0,8.811626434326172
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,64,1,128,1,float16,float16,0,4.2215627034505205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,64,1,128,1,float16,fp8,0,4.331114768981934
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,64,2,128,1,float16,float16,0,4.29527473449707
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,64,2,128,1,float16,fp8,0,4.293989181518555
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,64,64,128,1,float16,float16,0,2.396986643473307
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,64,4,128,1,float16,float16,0,4.37388261159261
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,64,64,128,1,float16,fp8,0,2.466106732686361
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,64,8,128,1,float16,float16,0,4.274250666300456
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,64,4,128,1,float16,fp8,0,4.548495928446452
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,64,8,128,1,float16,fp8,0,4.4528961181640625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,64,1,128,1,float16,float16,0,2.1998292605082193
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,64,1,128,1,float16,fp8,0,2.16701873143514
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,64,2,128,1,float16,float16,0,2.149717330932617
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,64,2,128,1,float16,fp8,0,2.2803093592325845
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,64,4,128,1,float16,float16,0,2.187621275583903
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,64,64,128,1,float16,float16,0,1.2297653357187908
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,64,4,128,1,float16,fp8,0,2.385157267252604
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,64,8,128,1,float16,float16,0,2.150458653767904
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,64,8,128,1,float16,fp8,0,2.248703956604004
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,64,1,128,1,float16,fp8,0,1.13045867284139
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,64,64,128,1,float16,fp8,0,1.2649386723836262
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,64,1,128,1,float16,float16,0,1.1358239650726318
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,64,2,128,1,float16,float16,0,1.0963892936706543
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,64,4,128,1,float16,float16,0,1.1152586936950684
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,64,2,128,1,float16,fp8,0,1.1550239721934001
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,64,4,128,1,float16,fp8,0,1.1552159786224365
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,64,8,128,1,float16,float16,0,1.0914080142974854
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,64,8,128,1,float16,fp8,0,1.1585493087768555
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,64,1,128,1,float16,fp8,0,16.04408009847005
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,64,2,128,1,float16,float16,0,17.73307164510091
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,64,4,128,1,float16,fp8,0,16.887723286946613
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,64,4,128,1,float16,float16,0,18.10731252034505
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,64,8,128,1,float16,float16,0,17.650591532389324
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,64,2,128,1,float16,fp8,0,16.29266103108724
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,64,1,128,1,float16,float16,0,17.777727762858074
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,64,64,128,1,float16,float16,0,9.852314631144205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,64,1,128,1,float16,float16,0,7.9181334177653
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,64,64,128,1,float16,fp8,0,9.579018910725912
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,64,1,128,1,float16,fp8,0,8.090293248494467
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,64,2,128,1,float16,float16,0,8.798282623291016
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,64,2,128,1,float16,fp8,0,8.077605565388998
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,64,4,128,1,float16,float16,0,9.00927988688151
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,64,8,128,1,float16,fp8,0,16.587029774983723
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,64,1,128,1,float16,float16,0,3.9261706670125327
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,64,64,128,1,float16,fp8,0,4.880101203918457
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,64,64,128,1,float16,float16,0,4.59061336517334
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,64,1,128,1,float16,fp8,0,4.216016133626302
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,64,4,128,1,float16,fp8,0,8.506687800089518
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,64,8,128,1,float16,fp8,0,8.45630963643392
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,64,8,128,1,float16,float16,0,8.503519694010416
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,64,2,128,1,float16,float16,0,4.018639882405599
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,64,2,128,1,float16,fp8,0,4.242522557576497
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,64,4,128,1,float16,float16,0,4.063269297281901
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,64,1,128,1,float16,float16,0,1.9847572644551594
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,64,64,128,1,float16,float16,0,2.278437296549479
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,64,8,128,1,float16,float16,0,4.004474639892578
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,64,64,128,1,float16,fp8,0,2.538933277130127
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,64,8,128,1,float16,fp8,0,4.246245384216309
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,64,4,128,1,float16,fp8,0,4.327877362569173
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,64,1,128,1,float16,fp8,0,2.065173308054606
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,64,2,128,1,float16,float16,0,2.0662453969319663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,64,2,128,1,float16,fp8,0,2.1270292599995932
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,64,4,128,1,float16,float16,0,2.0541332562764487
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,64,8,128,1,float16,float16,0,2.0121706326802573
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,64,4,128,1,float16,fp8,0,2.300058682759603
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,64,64,128,1,float16,float16,0,1.1830453077952068
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,64,1,128,1,float16,fp8,0,1.0497653484344482
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,64,8,128,1,float16,fp8,0,2.148128032684326
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,64,64,128,1,float16,fp8,0,1.2419573465983074
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,64,1,128,1,float16,float16,0,1.009109338124593
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,64,2,128,1,float16,float16,0,1.013050635655721
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,64,2,128,1,float16,fp8,0,1.0619200070699055
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,64,4,128,1,float16,float16,0,1.0562346776326497
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,64,8,128,1,float16,fp8,0,1.0998026529947917
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,64,8,128,1,float16,float16,0,1.0315893491109211
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,64,4,128,1,float16,fp8,0,1.1100107034047444
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,64,1,128,1,float16,float16,0,0.5147626797358195
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,64,64,128,1,float16,float16,0,0.6081546545028687
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,64,64,128,1,float16,fp8,0,0.654917319615682
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,64,2,128,1,float16,float16,0,0.5252319971720377
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,64,1,128,1,float16,fp8,0,0.5499573151270548
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,64,2,128,1,float16,fp8,0,0.5534720023473104
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,64,4,128,1,float16,float16,0,0.5467093388239542
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,64,4,128,1,float16,fp8,0,0.5776533285776774
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,64,8,128,1,float16,float16,0,0.527077317237854
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,64,8,128,1,float16,fp8,0,0.5806293487548828
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,64,1,128,1,float16,float16,0,10.31721051534017
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,64,1,128,1,float16,fp8,0,9.600597381591797
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,64,2,128,1,float16,float16,0,10.382991790771484
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,64,4,128,1,float16,float16,0,10.397504170735678
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,64,8,128,1,float16,float16,0,9.951728185017904
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,64,8,128,1,float16,fp8,0,10.133386611938477
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,64,2,128,1,float16,fp8,0,9.610858917236328
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,64,4,128,1,float16,fp8,0,10.262058893839518
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,64,1,128,1,float16,float16,0,4.669546763102214
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,64,1,128,1,float16,fp8,0,4.862602551778157
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,64,64,128,1,float16,float16,0,5.663141250610352
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,64,2,128,1,float16,float16,0,4.952879905700684
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,64,64,128,1,float16,fp8,0,5.967514673868815
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,64,2,128,1,float16,fp8,0,4.839695930480957
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,64,4,128,1,float16,float16,0,5.075423876444499
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,64,4,128,1,float16,fp8,0,5.110752105712891
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,64,64,128,1,float16,float16,0,2.7805919647216797
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,64,1,128,1,float16,fp8,0,2.418351968129476
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,64,1,128,1,float16,float16,0,2.315802733103434
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,64,8,128,1,float16,float16,0,4.937002817789714
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,64,64,128,1,float16,fp8,0,3.0585225423177085
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,64,2,128,1,float16,float16,0,2.3721440633138022
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,64,2,128,1,float16,fp8,0,2.4417227109273276
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,64,8,128,1,float16,fp8,0,5.0819091796875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,64,4,128,1,float16,float16,0,2.4691947301228843
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,64,4,128,1,float16,fp8,0,2.5883359909057617
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,64,64,128,1,float16,float16,0,1.4102613131205242
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,64,8,128,1,float16,float16,0,2.4227466583251953
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,64,1,128,1,float16,float16,0,1.166383981704712
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,64,64,128,1,float16,fp8,0,1.5726025899251301
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,64,8,128,1,float16,fp8,0,2.691295941670736
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,64,1,128,1,float16,fp8,0,1.2380747000376384
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,64,2,128,1,float16,float16,0,1.1840266386667888
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,64,2,128,1,float16,fp8,0,1.2416319847106934
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,64,4,128,1,float16,float16,0,1.2358559767405193
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,64,4,128,1,float16,fp8,0,1.3577280044555664
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,64,8,128,1,float16,float16,0,1.2193066279093425
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,64,8,128,1,float16,fp8,0,1.3266773223876953
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,64,64,128,1,float16,fp8,0,0.7877866427103678
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,64,1,128,1,float16,float16,0,0.5937226613362631
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,64,64,128,1,float16,float16,0,0.7376533349355062
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,64,1,128,1,float16,fp8,0,0.6398933331171671
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,64,2,128,1,float16,float16,0,0.6113706827163696
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,64,2,128,1,float16,fp8,0,0.6442079941431681
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,64,4,128,1,float16,float16,0,0.6335200071334839
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,64,8,128,1,float16,float16,0,0.6199893156687418
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,64,4,128,1,float16,fp8,0,0.6809813181559244
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,64,8,128,1,float16,fp8,0,0.683135986328125
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,64,64,128,1,float16,float16,0,0.3798240025838216
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,64,2,128,1,float16,float16,0,0.32015466690063477
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,64,1,128,1,float16,float16,0,0.31214932600657147
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,64,64,128,1,float16,fp8,0,0.4182773431142171
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,64,1,128,1,float16,fp8,0,0.33930667241414386
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,64,2,128,1,float16,fp8,0,0.34457600116729736
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,64,4,128,1,float16,float16,0,0.33746135234832764
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,64,4,128,1,float16,fp8,0,0.3614186843236287
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,64,8,128,1,float16,float16,0,0.32683734099070233
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,64,8,128,1,float16,fp8,0,0.3586346705754598
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,64,1,128,1,float16,float16,0,9.596879959106445
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,64,4,128,1,float16,float16,0,10.191781361897787
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,64,2,128,1,float16,fp8,0,9.756752014160156
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,64,8,128,1,float16,float16,0,10.161354700724283
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,64,4,128,1,float16,fp8,0,10.355978647867838
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,64,1,128,1,float16,fp8,0,9.5098025004069
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,64,2,128,1,float16,float16,0,9.695834477742514
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,64,64,128,1,float16,float16,0,5.930325190226237
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,64,8,128,1,float16,fp8,0,9.975626627604166
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,64,1,128,1,float16,float16,0,4.58406925201416
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,64,2,128,1,float16,float16,0,4.752170562744141
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,64,1,128,1,float16,fp8,0,4.983215967814128
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,64,64,128,1,float16,fp8,0,6.440437316894531
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,64,2,128,1,float16,fp8,0,4.813088099161784
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,64,4,128,1,float16,float16,0,4.974591890970866
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,64,1,128,1,float16,float16,0,2.216965357462565
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,64,64,128,1,float16,float16,0,2.8843040466308594
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,64,8,128,1,float16,float16,0,4.78055477142334
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,64,64,128,1,float16,fp8,0,3.162464141845703
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,64,4,128,1,float16,fp8,0,5.468346913655599
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,64,1,128,1,float16,fp8,0,2.3868373235066733
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,64,8,128,1,float16,fp8,0,5.2069441477457685
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,64,2,128,1,float16,float16,0,2.3418614069620767
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,64,2,128,1,float16,fp8,0,2.5122879346211753
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,64,4,128,1,float16,float16,0,2.4340693155924478
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,64,64,128,1,float16,float16,0,1.454485257466634
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,64,64,128,1,float16,fp8,0,1.6122666994730632
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,64,1,128,1,float16,float16,0,1.1227093537648518
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,64,8,128,1,float16,float16,0,2.390687942504883
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,64,8,128,1,float16,fp8,0,2.612287998199463
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,64,4,128,1,float16,fp8,0,2.7156588236490884
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,64,1,128,1,float16,fp8,0,1.2314826647440593
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,64,4,128,1,float16,float16,0,1.2334880034128826
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,64,2,128,1,float16,fp8,0,1.2303840319315593
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,64,2,128,1,float16,float16,0,1.1674453417460124
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,64,4,128,1,float16,fp8,0,1.3418505986531575
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,64,8,128,1,float16,float16,0,1.202672004699707
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,64,64,128,1,float16,float16,0,0.7442026933034261
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,64,8,128,1,float16,fp8,0,1.3119040330251057
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,64,1,128,1,float16,fp8,0,0.6233386596043905
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,64,64,128,1,float16,fp8,0,0.8184266885121664
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,64,1,128,1,float16,float16,0,0.5836266676584879
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,64,2,128,1,float16,float16,0,0.5962826808293661
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,64,2,128,1,float16,fp8,0,0.6406720081965128
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,64,4,128,1,float16,float16,0,0.6281066735585531
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,64,8,128,1,float16,float16,0,0.6151893138885498
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,64,4,128,1,float16,fp8,0,0.6881120204925537
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,64,64,128,1,float16,float16,0,0.38441065947214764
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,64,8,128,1,float16,fp8,0,0.6702346801757812
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,64,64,128,1,float16,fp8,0,0.4292213519414266
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,64,1,128,1,float16,float16,0,0.3032960096995036
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,64,1,128,1,float16,fp8,0,0.3309280077616374
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,64,2,128,1,float16,float16,0,0.3121333320935567
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,64,2,128,1,float16,fp8,0,0.33981335163116455
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,64,4,128,1,float16,float16,0,0.3299573262532552
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,64,4,128,1,float16,fp8,0,0.36346666018168133
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,64,8,128,1,float16,float16,0,0.31919999917348224
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,64,8,128,1,float16,fp8,0,0.35782933235168457
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,64,64,128,1,float16,float16,0,0.21056000391642252
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,64,64,128,1,float16,fp8,0,0.23176000515619913
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,64,1,128,1,float16,float16,0,0.1634880006313324
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,64,1,128,1,float16,fp8,0,0.1811413367589315
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,64,2,128,1,float16,float16,0,0.16979199647903442
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,64,2,128,1,float16,fp8,0,0.18639999628067017
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,64,4,128,1,float16,float16,0,0.1765600045522054
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,64,4,128,1,float16,fp8,0,0.1990506649017334
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,64,8,128,1,float16,float16,0,0.17606399456659952
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,64,8,128,1,float16,fp8,0,0.19427200158437094
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,64,1,128,1,float16,float16,0,5.470794677734375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,64,1,128,1,float16,fp8,0,5.897200266520183
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,64,4,128,1,float16,float16,0,6.1975148518880205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,64,8,128,1,float16,float16,0,5.911429087320964
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,64,4,128,1,float16,fp8,0,6.527941385904948
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,64,8,128,1,float16,fp8,0,6.367994944254558
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,64,2,128,1,float16,float16,0,6.021701176961263
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,64,64,128,1,float16,float16,0,3.691653251647949
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,64,1,128,1,float16,float16,0,2.7339680989583335
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,64,2,128,1,float16,float16,0,2.786778767903646
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,64,64,128,1,float16,fp8,0,4.120501200358073
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,64,1,128,1,float16,fp8,0,3.049935976664225
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,64,2,128,1,float16,fp8,0,3.125018755594889
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,64,4,128,1,float16,float16,0,3.0262133280436196
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,64,2,128,1,float16,fp8,0,6.061754862467448
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,64,4,128,1,float16,fp8,0,3.3859945933024087
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,64,8,128,1,float16,float16,0,2.9232266743977866
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,64,64,128,1,float16,float16,0,1.846992015838623
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,64,8,128,1,float16,fp8,0,3.207285245259603
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,64,64,128,1,float16,fp8,0,2.087087949117025
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,64,2,128,1,float16,float16,0,1.4305973052978516
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,64,1,128,1,float16,float16,0,1.3833707173665364
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,64,2,128,1,float16,fp8,0,1.5323200225830078
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,64,4,128,1,float16,float16,0,1.5399519602457683
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,64,1,128,1,float16,fp8,0,1.4942827224731445
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,64,4,128,1,float16,fp8,0,1.674623966217041
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,64,8,128,1,float16,float16,0,1.4785332679748535
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,64,64,128,1,float16,float16,0,0.9464266300201416
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,64,8,128,1,float16,fp8,0,1.633237361907959
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,64,64,128,1,float16,fp8,0,1.0528960227966309
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,64,2,128,1,float16,float16,0,0.7246080239613851
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,64,1,128,1,float16,float16,0,0.7040959993998209
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,64,1,128,1,float16,fp8,0,0.7625760237375895
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,64,2,128,1,float16,fp8,0,0.787722667058309
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,64,4,128,1,float16,fp8,0,0.8527466456095377
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,64,4,128,1,float16,float16,0,0.7773013114929199
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,64,8,128,1,float16,float16,0,0.7475787003835043
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,64,8,128,1,float16,fp8,0,0.8365866343180338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,64,1,128,1,float16,float16,0,0.36324799060821533
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,64,64,128,1,float16,float16,0,0.48473600546518963
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,64,64,128,1,float16,fp8,0,0.5429813464482626
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,64,1,128,1,float16,fp8,0,0.40061334768931073
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,64,2,128,1,float16,fp8,0,0.4071040153503418
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,64,2,128,1,float16,float16,0,0.37805867195129395
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,64,4,128,1,float16,fp8,0,0.4409866730372111
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,64,8,128,1,float16,float16,0,0.395146648089091
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,64,4,128,1,float16,float16,0,0.4020800193150838
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,64,8,128,1,float16,fp8,0,0.44142401218414307
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,64,1,128,1,float16,float16,0,0.19262933731079102
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,64,64,128,1,float16,fp8,0,0.2914773424466451
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,64,64,128,1,float16,float16,0,0.25574400027592975
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,64,1,128,1,float16,fp8,0,0.21590399742126465
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,64,2,128,1,float16,float16,0,0.20105600357055664
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,64,2,128,1,float16,fp8,0,0.22009066740671793
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,64,8,128,1,float16,float16,0,0.2093706727027893
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,64,4,128,1,float16,float16,0,0.21485867102940878
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,64,4,128,1,float16,fp8,0,0.23690666755040488
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,64,8,128,1,float16,fp8,0,0.23429334163665771
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,64,64,128,1,float16,float16,0,0.14229333400726318
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,64,2,128,1,float16,fp8,0,0.12191466490427653
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,64,64,128,1,float16,fp8,0,0.16242667039235434
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,64,4,128,1,float16,float16,0,0.11974933743476868
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,64,1,128,1,float16,float16,0,0.10621866583824158
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,64,1,128,1,float16,fp8,0,0.11732799808184306
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,64,2,128,1,float16,float16,0,0.1104853351910909
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,64,4,128,1,float16,fp8,0,0.12864533066749573
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,64,8,128,1,float16,float16,0,0.11364799737930298
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,64,8,128,1,float16,fp8,0,0.125408003727595
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,64,1,128,1,float16,float16,0,5.62388292948405
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,64,2,128,1,float16,float16,0,6.067504247029622
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,64,8,128,1,float16,float16,0,6.12825075785319
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,64,1,128,1,float16,fp8,0,6.305525461832683
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,64,2,128,1,float16,fp8,0,6.431013107299805
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,64,4,128,1,float16,float16,0,6.472768147786458
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,64,8,128,1,float16,fp8,0,6.8386077880859375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,64,4,128,1,float16,fp8,0,7.095034917195638
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,64,1,128,1,float16,float16,0,2.831637382507324
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,64,64,128,1,float16,float16,0,4.06492805480957
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,64,1,128,1,float16,fp8,0,3.1695734659830728
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,64,64,128,1,float16,fp8,0,4.815930684407552
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,64,64,128,1,float16,float16,0,2.0360320409139
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,64,8,128,1,float16,float16,0,3.0450881322224936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,64,2,128,1,float16,float16,0,2.912405331929525
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,64,8,128,1,float16,fp8,0,3.543034553527832
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,64,64,128,1,float16,fp8,0,2.4162346522013345
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,64,4,128,1,float16,fp8,0,3.5611359278361
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,64,4,128,1,float16,float16,0,3.222794532775879
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,64,2,128,1,float16,fp8,0,3.285973230997721
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,64,1,128,1,float16,float16,0,1.430949370066325
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,64,1,128,1,float16,fp8,0,1.5983467102050781
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,64,2,128,1,float16,float16,0,1.5095465977986653
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,64,4,128,1,float16,fp8,0,1.8040000597635906
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,64,4,128,1,float16,float16,0,1.6346933046976726
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,64,64,128,1,float16,float16,0,1.0366986592610676
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,64,2,128,1,float16,fp8,0,1.6587200164794922
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,64,64,128,1,float16,fp8,0,1.2169973055521648
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,64,8,128,1,float16,fp8,0,1.7670079867045085
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,64,1,128,1,float16,float16,0,0.7307039896647135
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,64,8,128,1,float16,float16,0,1.5467573801676433
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,64,1,128,1,float16,fp8,0,0.8073013623555502
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,64,2,128,1,float16,float16,0,0.7650293509165446
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,64,4,128,1,float16,float16,0,0.8352959950764974
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,64,4,128,1,float16,fp8,0,0.9363946914672852
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,64,8,128,1,float16,float16,0,0.796176036198934
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,64,2,128,1,float16,fp8,0,0.8386399745941162
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,64,8,128,1,float16,fp8,0,0.8977173169453939
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,64,64,128,1,float16,fp8,0,0.6195253531138102
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,64,1,128,1,float16,float16,0,0.3785173495610555
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,64,64,128,1,float16,float16,0,0.5293386777242025
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,64,1,128,1,float16,fp8,0,0.4216639995574951
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,64,2,128,1,float16,float16,0,0.3967680136362712
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,64,2,128,1,float16,fp8,0,0.43665067354838055
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,64,4,128,1,float16,float16,0,0.42925333976745605
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,64,8,128,1,float16,float16,0,0.41259201367696124
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,64,8,128,1,float16,fp8,0,0.46644266446431476
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,64,4,128,1,float16,fp8,0,0.4752533435821533
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,64,64,128,1,float16,fp8,0,0.32917332649230957
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,64,1,128,1,float16,fp8,0,0.22445867458979288
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,64,64,128,1,float16,float16,0,0.2788426677385966
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,64,1,128,1,float16,float16,0,0.20119466384251913
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,64,2,128,1,float16,float16,0,0.2088373303413391
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,64,4,128,1,float16,float16,0,0.2242400050163269
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,64,2,128,1,float16,fp8,0,0.23154133558273315
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,64,4,128,1,float16,fp8,0,0.25481067101160687
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,64,8,128,1,float16,float16,0,0.21839465697606406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,64,8,128,1,float16,fp8,0,0.24822932481765747
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,64,64,128,1,float16,float16,0,0.15235732992490134
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,64,64,128,1,float16,fp8,0,0.17947733402252197
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,64,1,128,1,float16,float16,0,0.10802132884661357
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,64,1,128,1,float16,fp8,0,0.12636799613634744
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,64,2,128,1,float16,float16,0,0.11339199542999268
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,64,2,128,1,float16,fp8,0,0.12970667084058127
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,64,4,128,1,float16,float16,0,0.12409599622090657
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,64,8,128,1,float16,float16,0,0.11778666575749715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,64,4,128,1,float16,fp8,0,0.13959466417630514
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,64,8,128,1,float16,fp8,0,0.1357973317305247
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,64,64,128,1,float16,float16,0,0.08663466572761536
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,64,64,128,1,float16,fp8,0,0.10133866469065349
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,64,1,128,1,float16,float16,0,0.058506667613983154
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,64,1,128,1,float16,fp8,0,0.06670933465162913
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,64,2,128,1,float16,float16,0,0.06238933404286703
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,64,2,128,1,float16,fp8,0,0.07187733550866444
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,64,4,128,1,float16,float16,0,0.06739200154940288
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,64,4,128,1,float16,fp8,0,0.07606400052706401
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,64,8,128,1,float16,float16,0,0.06424533327420552
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,64,8,128,1,float16,fp8,0,0.07526400188604991
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,64,1,128,1,float16,fp8,0,4.724752108256022
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,64,2,128,1,float16,float16,0,4.284122784932454
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,64,2,128,1,float16,fp8,0,4.908224105834961
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,64,64,128,1,float16,float16,0,3.2638400395711265
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,64,4,128,1,float16,float16,0,4.866640090942383
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,64,4,128,1,float16,fp8,0,5.6724904378255205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,64,1,128,1,float16,float16,0,2.092485268910726
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,64,1,128,1,float16,float16,0,4.144357363382976
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,64,1,128,1,float16,fp8,0,2.3764799435933432
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,64,8,128,1,float16,float16,0,4.632906595865886
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,64,8,128,1,float16,fp8,0,5.263594627380371
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,64,64,128,1,float16,fp8,0,4.170480092366536
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,64,2,128,1,float16,float16,0,2.1820267041524253
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,64,4,128,1,float16,float16,0,2.477306683858236
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,64,2,128,1,float16,fp8,0,2.4874986012776694
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,64,4,128,1,float16,fp8,0,2.8515412012736
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,64,64,128,1,float16,fp8,0,2.099760055541992
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,64,1,128,1,float16,float16,0,1.0469760100046794
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,64,2,128,1,float16,float16,0,1.112720012664795
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,64,1,128,1,float16,fp8,0,1.2129120031992595
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,64,8,128,1,float16,fp8,0,2.7319520314534507
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,64,64,128,1,float16,float16,0,1.6481653849283855
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,64,2,128,1,float16,fp8,0,1.247594674428304
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,64,4,128,1,float16,fp8,0,1.4260907173156738
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,64,4,128,1,float16,float16,0,1.2428692976633708
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,64,8,128,1,float16,float16,0,1.1815679868062336
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,64,8,128,1,float16,fp8,0,1.3727253278096516
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,64,64,128,1,float16,float16,0,0.833189328511556
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,64,64,128,1,float16,fp8,0,1.0732426643371582
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,64,1,128,1,float16,float16,0,0.5393120050430298
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,64,1,128,1,float16,fp8,0,0.6101760069529215
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,64,2,128,1,float16,float16,0,0.5704266627629598
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,64,8,128,1,float16,float16,0,2.300106684366862
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,64,2,128,1,float16,fp8,0,0.6402080059051514
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,64,4,128,1,float16,float16,0,0.63318932056427
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,64,4,128,1,float16,fp8,0,0.7394560178120931
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,64,64,128,1,float16,float16,0,0.4272746642430623
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,64,8,128,1,float16,fp8,0,0.7009920279184977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,64,64,128,1,float16,fp8,0,0.5452746550242106
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,64,1,128,1,float16,fp8,0,0.32306132713953656
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,64,1,128,1,float16,float16,0,0.2801706592241923
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,64,2,128,1,float16,float16,0,0.297706663608551
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,64,2,128,1,float16,fp8,0,0.3309599955876668
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,64,8,128,1,float16,float16,0,0.5977919896443685
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,64,4,128,1,float16,float16,0,0.32465600967407227
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,64,4,128,1,float16,fp8,0,0.3814133405685425
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,64,8,128,1,float16,fp8,0,0.368069330851237
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,64,8,128,1,float16,float16,0,0.31191466252009076
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,64,64,128,1,float16,float16,0,0.2262880007425944
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,64,64,128,1,float16,fp8,0,0.2862933278083801
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,64,1,128,1,float16,float16,0,0.15095466375350952
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,64,1,128,1,float16,fp8,0,0.17255999644597372
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,64,2,128,1,float16,float16,0,0.15776532888412476
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,64,2,128,1,float16,fp8,0,0.1785866618156433
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,64,4,128,1,float16,float16,0,0.17402132352193198
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,64,4,128,1,float16,fp8,0,0.20061866442362467
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,64,8,128,1,float16,float16,0,0.1661066710948944
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,64,8,128,1,float16,fp8,0,0.19273600975672403
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,64,64,128,1,float16,float16,0,0.12402133146921794
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,64,64,128,1,float16,fp8,0,0.15597333510716757
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,64,1,128,1,float16,float16,0,0.08318933347860973
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,64,1,128,1,float16,fp8,0,0.09877866506576538
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,64,2,128,1,float16,float16,0,0.08749866485595703
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,64,2,128,1,float16,fp8,0,0.10187199711799622
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,64,4,128,1,float16,float16,0,0.09500267108281453
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,64,4,128,1,float16,fp8,0,0.11209066708882649
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,64,8,128,1,float16,float16,0,0.09101866682370503
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,64,8,128,1,float16,fp8,0,0.10636799534161885
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,64,64,128,1,float16,float16,0,0.0706826647122701
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,64,64,128,1,float16,fp8,0,0.0897866686185201
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,64,1,128,1,float16,float16,0,0.04526400069395701
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,64,1,128,1,float16,fp8,0,0.05417599777380625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,64,2,128,1,float16,float16,0,0.04797333478927612
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,64,2,128,1,float16,fp8,0,0.05539733171463013
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,64,4,128,1,float16,float16,0,0.05276800195376078
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,64,4,128,1,float16,fp8,0,0.05893866717815399
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,64,8,128,1,float16,float16,0,0.05064000189304352
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,64,8,128,1,float16,fp8,0,0.0572266678015391
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,64,64,128,1,float16,float16,0,0.0413973331451416
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,64,64,128,1,float16,fp8,0,0.04798933366934458
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,64,1,128,1,float16,float16,0,0.027610667049884796
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,64,1,128,1,float16,fp8,0,0.0329120010137558
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,64,2,128,1,float16,float16,0,0.02992533395687739
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,64,2,128,1,float16,fp8,0,0.0347626656293869
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,64,4,128,1,float16,float16,0,0.03179733455181122
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,64,8,128,1,float16,float16,0,0.03102933367093404
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,64,4,128,1,float16,fp8,0,0.03668266783157984
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,64,8,128,1,float16,fp8,0,0.03589866558710734
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,64,1,128,1,float16,float16,0,1.7428746223449707
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,64,1,128,1,float16,fp8,0,1.9970720609029133
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,64,2,128,1,float16,fp8,0,2.04367462793986
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,64,4,128,1,float16,float16,0,2.09552001953125
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,64,64,128,1,float16,float16,0,1.4651145935058594
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,64,8,128,1,float16,float16,0,1.9666186968485515
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,64,1,128,1,float16,float16,0,0.8850239912668864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,64,8,128,1,float16,fp8,0,2.327178637186686
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,64,64,128,1,float16,fp8,0,1.788922627766927
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,64,2,128,1,float16,float16,0,1.821381409962972
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,64,4,128,1,float16,fp8,0,2.453850587209066
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,64,1,128,1,float16,fp8,0,1.0124266942342122
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,64,2,128,1,float16,fp8,0,1.0436480045318604
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,64,2,128,1,float16,float16,0,0.9372159639994303
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,64,4,128,1,float16,float16,0,1.055077314376831
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,64,1,128,1,float16,float16,0,0.4520639975865682
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,64,64,128,1,float16,float16,0,0.7432906627655029
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,64,64,128,1,float16,fp8,0,0.9149440129597982
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,64,8,128,1,float16,float16,0,0.9882613023122152
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,64,8,128,1,float16,fp8,0,1.2031413714090984
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,64,1,128,1,float16,fp8,0,0.5181653499603271
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,64,2,128,1,float16,float16,0,0.4757759968439738
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,64,2,128,1,float16,fp8,0,0.5349066654841105
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,64,4,128,1,float16,float16,0,0.5441919962565104
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,64,8,128,1,float16,float16,0,0.5043786764144897
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,64,64,128,1,float16,float16,0,0.38355199495951336
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,64,4,128,1,float16,fp8,0,1.2466293176015217
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,64,64,128,1,float16,fp8,0,0.4666186571121216
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,64,8,128,1,float16,fp8,0,0.6020799875259399
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,64,1,128,1,float16,float16,0,0.23679999510447183
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,64,4,128,1,float16,fp8,0,0.6410346825917562
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,64,1,128,1,float16,fp8,0,0.2722453276316325
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,64,2,128,1,float16,float16,0,0.2477653423945109
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,64,2,128,1,float16,fp8,0,0.28095465898513794
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,64,4,128,1,float16,fp8,0,0.3307039936383565
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,64,4,128,1,float16,float16,0,0.2794133424758911
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,64,8,128,1,float16,float16,0,0.26316267251968384
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,64,8,128,1,float16,fp8,0,0.3199146588643392
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,64,64,128,1,float16,float16,0,0.20463999112447104
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,64,64,128,1,float16,fp8,0,0.2469493349393209
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,64,1,128,1,float16,float16,0,0.12660800417264303
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,64,1,128,1,float16,fp8,0,0.14621866742769876
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,64,2,128,1,float16,float16,0,0.13271466890970865
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,64,2,128,1,float16,fp8,0,0.15220800042152405
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,64,4,128,1,float16,float16,0,0.149018665154775
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,64,4,128,1,float16,fp8,0,0.17449599504470825
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,64,8,128,1,float16,float16,0,0.14064000050226846
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,64,64,128,1,float16,float16,0,0.11169600486755371
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,64,64,128,1,float16,fp8,0,0.135754664738973
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,64,1,128,1,float16,float16,0,0.07144000132878621
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,64,8,128,1,float16,fp8,0,0.1679626703262329
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,64,1,128,1,float16,fp8,0,0.08553066849708557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,64,2,128,1,float16,float16,0,0.07406933108965556
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,64,2,128,1,float16,fp8,0,0.08742933471997578
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,64,4,128,1,float16,float16,0,0.08207466701666515
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,64,4,128,1,float16,fp8,0,0.09796800216039021
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,64,8,128,1,float16,float16,0,0.07745066781838734
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,64,8,128,1,float16,fp8,0,0.09397866328557332
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,64,64,128,1,float16,fp8,0,0.07815999786059062
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,64,64,128,1,float16,float16,0,0.0637066662311554
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,64,1,128,1,float16,float16,0,0.03888533264398575
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,64,1,128,1,float16,fp8,0,0.046341334780057274
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,64,2,128,1,float16,float16,0,0.04079466561476389
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,64,2,128,1,float16,fp8,0,0.04806933303674062
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,64,4,128,1,float16,float16,0,0.04282666742801666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,64,4,128,1,float16,fp8,0,0.05074666440486908
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,64,8,128,1,float16,float16,0,0.0410453329483668
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,64,8,128,1,float16,fp8,0,0.048581331968307495
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,64,64,128,1,float16,fp8,0,0.043280000487963356
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,64,64,128,1,float16,float16,0,0.03712533414363861
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,64,1,128,1,float16,float16,0,0.024271999796231587
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,64,4,128,1,float16,fp8,0,0.030602666238943737
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,64,1,128,1,float16,fp8,0,0.028954667349656422
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,64,2,128,1,float16,float16,0,0.02473066747188568
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,64,2,128,1,float16,fp8,0,0.029338667790095013
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,64,4,128,1,float16,float16,0,0.025722667574882507
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,64,64,128,1,float16,fp8,0,0.025173333783944447
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,64,8,128,1,float16,float16,0,0.02550400048494339
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,64,8,128,1,float16,fp8,0,0.030479999879995983
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,64,64,128,1,float16,float16,0,0.01951466624935468
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,64,1,128,1,float16,float16,0,0.01700266698996226
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,64,1,128,1,float16,fp8,0,0.020143999407688778
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,64,2,128,1,float16,float16,0,0.017338667064905167
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,64,4,128,1,float16,float16,0,0.017317333569129307
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,64,2,128,1,float16,fp8,0,0.020661332954963047
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,64,4,128,1,float16,fp8,0,0.020367999871571858
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,64,8,128,1,float16,float16,0,0.01752000053723653
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,64,8,128,1,float16,fp8,0,0.019930666933457058
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,64,1,128,1,float16,float16,0,0.8024586836496989
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,64,1,128,1,float16,fp8,0,0.8758347034454346
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,64,2,128,1,float16,float16,0,0.8627093633015951
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,64,4,128,1,float16,float16,0,0.9884853363037109
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,64,4,128,1,float16,fp8,0,1.109376033147176
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,64,8,128,1,float16,float16,0,0.8969653447469076
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,64,64,128,1,float16,float16,0,0.7019039789835612
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,64,64,128,1,float16,fp8,0,0.7806133429209391
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,64,2,128,1,float16,fp8,0,0.9114773273468018
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,64,8,128,1,float16,fp8,0,0.9958240191141764
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,64,1,128,1,float16,float16,0,0.41279999415079754
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,64,1,128,1,float16,fp8,0,0.45849064985911053
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,64,4,128,1,float16,fp8,0,0.5735146601994833
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,64,2,128,1,float16,fp8,0,0.4806506633758545
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,64,2,128,1,float16,float16,0,0.4405759970347087
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,64,64,128,1,float16,float16,0,0.36099199453989667
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,64,4,128,1,float16,float16,0,0.506991982460022
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,64,8,128,1,float16,float16,0,0.45856531461079914
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,64,8,128,1,float16,fp8,0,0.5163786808649699
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,64,64,128,1,float16,fp8,0,0.4026399850845337
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,64,1,128,1,float16,float16,0,0.21713600556055704
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,64,1,128,1,float16,fp8,0,0.24169067541758218
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,64,2,128,1,float16,fp8,0,0.2551093300183614
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,64,2,128,1,float16,float16,0,0.23387734095255533
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,64,64,128,1,float16,float16,0,0.19275200366973877
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,64,4,128,1,float16,fp8,0,0.297487994035085
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,64,64,128,1,float16,fp8,0,0.21658132473627725
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,64,1,128,1,float16,float16,0,0.11750400066375732
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,64,8,128,1,float16,float16,0,0.24102399746576944
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,64,4,128,1,float16,float16,0,0.26414400339126587
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,64,1,128,1,float16,fp8,0,0.13496533036231995
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,64,2,128,1,float16,fp8,0,0.139984001715978
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,64,2,128,1,float16,float16,0,0.1251200040181478
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,64,4,128,1,float16,float16,0,0.14094932874043783
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,64,4,128,1,float16,fp8,0,0.16268799702326456
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,64,64,128,1,float16,float16,0,0.10803733269373576
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,64,8,128,1,float16,float16,0,0.12897066275278726
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,64,8,128,1,float16,fp8,0,0.2757226626078288
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,64,8,128,1,float16,fp8,0,0.1483573317527771
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,64,64,128,1,float16,fp8,0,0.11643733580907185
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,64,1,128,1,float16,float16,0,0.0662720004717509
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,64,1,128,1,float16,fp8,0,0.07552533348401387
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,64,2,128,1,float16,float16,0,0.07127466797828674
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,64,2,128,1,float16,fp8,0,0.07946133116881053
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,64,4,128,1,float16,float16,0,0.07826666533946991
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,64,4,128,1,float16,fp8,0,0.08983467022577922
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,64,8,128,1,float16,float16,0,0.07016000151634216
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,64,8,128,1,float16,fp8,0,0.0828906645377477
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,64,64,128,1,float16,float16,0,0.06061333417892456
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,64,64,128,1,float16,fp8,0,0.06557333469390869
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,64,4,128,1,float16,float16,0,0.03860266755024592
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,64,1,128,1,float16,float16,0,0.03294399877389272
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,64,1,128,1,float16,fp8,0,0.03822399924198786
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,64,2,128,1,float16,float16,0,0.03634133438269297
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,64,64,128,1,float16,float16,0,0.0335413341720899
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,64,2,128,1,float16,fp8,0,0.04018666595220566
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,64,4,128,1,float16,fp8,0,0.044351999958356224
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,64,8,128,1,float16,float16,0,0.03675200045108795
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,64,8,128,1,float16,fp8,0,0.041589332123597465
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,64,64,128,1,float16,fp8,0,0.0331839993596077
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,64,1,128,1,float16,float16,0,0.021242665747801464
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,64,1,128,1,float16,fp8,0,0.02475733309984207
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,64,2,128,1,float16,float16,0,0.021226666867733
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,64,4,128,1,float16,float16,0,0.022682666778564453
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,64,2,128,1,float16,fp8,0,0.024703999360402424
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,64,4,128,1,float16,fp8,0,0.027098665634791057
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,64,8,128,1,float16,float16,0,0.022645334402720135
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,64,8,128,1,float16,fp8,0,0.02590399980545044
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,64,64,128,1,float16,float16,0,0.017893332988023758
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,64,64,128,1,float16,fp8,0,0.020821332931518555
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,64,1,128,1,float16,float16,0,0.015466666469971338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,64,1,128,1,float16,fp8,0,0.01836266616980235
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,64,2,128,1,float16,float16,0,0.015520000209410986
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,64,4,128,1,float16,float16,0,0.015781333049138386
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,64,2,128,1,float16,fp8,0,0.01809599995613098
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,64,4,128,1,float16,fp8,0,0.0183146670460701
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,64,8,128,1,float16,float16,0,0.01597333326935768
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,64,8,128,1,float16,fp8,0,0.01812800019979477
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,64,64,128,1,float16,float16,0,0.016496000190575916
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,64,64,128,1,float16,fp8,0,0.019023999571800232
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,64,1,128,1,float16,float16,0,0.014837333311637243
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,64,1,128,1,float16,fp8,0,0.0174346665541331
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,64,2,128,1,float16,float16,0,0.014959999670584997
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,64,8,128,1,float16,float16,0,0.014922666052977243
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,64,2,128,1,float16,fp8,0,0.017711999515692394
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,64,4,128,1,float16,fp8,0,0.017957333475351334
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,64,8,128,1,float16,fp8,0,0.017727999637524288
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,64,4,128,1,float16,float16,0,0.015050667027632395
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,64,1,128,1,float16,float16,0,0.4078986644744873
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,64,1,128,1,float16,fp8,0,0.4590719938278198
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,64,2,128,1,float16,fp8,0,0.47808531920115155
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,64,4,128,1,float16,float16,0,0.49856531620025635
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,64,64,128,1,float16,float16,0,0.41887466112772626
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,64,4,128,1,float16,fp8,0,0.5626560052235922
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,64,8,128,1,float16,fp8,0,0.5130186478296915
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,64,2,128,1,float16,float16,0,0.44045865535736084
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,64,64,128,1,float16,fp8,0,0.40646934509277344
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,64,2,128,1,float16,float16,0,0.22992000977198282
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,64,4,128,1,float16,float16,0,0.26262933015823364
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,64,1,128,1,float16,float16,0,0.2132586638132731
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,64,8,128,1,float16,float16,0,0.4491093158721924
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,64,2,128,1,float16,fp8,0,0.2560533285140991
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,64,1,128,1,float16,fp8,0,0.24266133705774942
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,64,4,128,1,float16,fp8,0,0.3006933331489563
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,64,8,128,1,float16,float16,0,0.23428799708684286
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,64,8,128,1,float16,fp8,0,0.27081600824991864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,64,64,128,1,float16,float16,0,0.21922133366266885
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,64,64,128,1,float16,fp8,0,0.2125706672668457
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,64,1,128,1,float16,float16,0,0.11618666847546895
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,64,1,128,1,float16,fp8,0,0.1345919966697693
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,64,2,128,1,float16,fp8,0,0.14017599821090698
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,64,4,128,1,float16,float16,0,0.14036800463994345
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,64,4,128,1,float16,fp8,0,0.16394133369127908
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,64,64,128,1,float16,float16,0,0.11700800061225891
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,64,8,128,1,float16,fp8,0,0.1478613317012787
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,64,8,128,1,float16,float16,0,0.12736533085505167
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,64,1,128,1,float16,float16,0,0.06656000018119812
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,64,1,128,1,float16,fp8,0,0.07574399809042613
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,64,2,128,1,float16,float16,0,0.12363200386365254
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,64,64,128,1,float16,fp8,0,0.11552533507347107
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,64,2,128,1,float16,float16,0,0.0690719981988271
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,64,4,128,1,float16,float16,0,0.07799999912579854
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,64,2,128,1,float16,fp8,0,0.07969599962234497
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,64,4,128,1,float16,fp8,0,0.0906986693541209
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,64,8,128,1,float16,float16,0,0.06968533496061961
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,64,8,128,1,float16,fp8,0,0.08305599788824718
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,64,64,128,1,float16,float16,0,0.066170667608579
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,64,64,128,1,float16,fp8,0,0.06353599826494853
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,64,1,128,1,float16,float16,0,0.03288000077009201
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,64,1,128,1,float16,fp8,0,0.0377813329299291
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,64,2,128,1,float16,float16,0,0.03484266748030981
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,64,2,128,1,float16,fp8,0,0.039647998909155525
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,64,4,128,1,float16,float16,0,0.03814399987459183
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,64,4,128,1,float16,fp8,0,0.04369066655635834
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,64,8,128,1,float16,float16,0,0.03626133253177007
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,64,8,128,1,float16,fp8,0,0.04152533411979675
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,64,64,128,1,float16,float16,0,0.03703466554482778
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,64,64,128,1,float16,fp8,0,0.03254399945338567
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,64,1,128,1,float16,float16,0,0.020960000654061634
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,64,1,128,1,float16,fp8,0,0.02481066683928172
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,64,2,128,1,float16,fp8,0,0.02481599897146225
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,64,4,128,1,float16,fp8,0,0.026149332523345947
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,64,2,128,1,float16,float16,0,0.02125866711139679
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,64,4,128,1,float16,float16,0,0.02254933367172877
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,64,8,128,1,float16,fp8,0,0.025941332181294758
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,64,8,128,1,float16,float16,0,0.02275199939807256
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,64,64,128,1,float16,float16,0,0.01915733392039935
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,64,64,128,1,float16,fp8,0,0.02013333390156428
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,64,1,128,1,float16,float16,0,0.015504000087579092
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,64,1,128,1,float16,fp8,0,0.01800000046690305
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,64,2,128,1,float16,float16,0,0.01545599972208341
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,64,2,128,1,float16,fp8,0,0.018063999712467194
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,64,4,128,1,float16,float16,0,0.01568000018596649
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,64,4,128,1,float16,fp8,0,0.018229333062966663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,64,8,128,1,float16,float16,0,0.015781333049138386
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,64,8,128,1,float16,fp8,0,0.018031999468803406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,64,64,128,1,float16,float16,0,0.014645333091417948
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,64,64,128,1,float16,fp8,0,0.015487999965747198
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,64,1,128,1,float16,float16,0,0.015146666516860327
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,64,1,128,1,float16,fp8,0,0.017509333789348602
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,64,2,128,1,float16,float16,0,0.015402667224407196
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,64,2,128,1,float16,fp8,0,0.01800000046690305
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,64,4,128,1,float16,float16,0,0.015263999501864115
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,64,4,128,1,float16,fp8,0,0.017850667238235474
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,64,8,128,1,float16,float16,0,0.015157333264748255
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,64,8,128,1,float16,fp8,0,0.01739199956258138
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,64,64,128,1,float16,float16,0,0.013568000247081121
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,64,64,128,1,float16,fp8,0,0.014949332922697067
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,64,4,128,1,float16,float16,0,0.015200000256299973
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,64,1,128,1,float16,float16,0,0.014730667074521383
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,64,1,128,1,float16,fp8,0,0.017210666090250015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,64,2,128,1,float16,float16,0,0.014639999717473984
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,64,2,128,1,float16,fp8,0,0.01758933315674464
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,64,4,128,1,float16,fp8,0,0.017045332739750545
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,64,8,128,1,float16,float16,0,0.014746667196353277
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,64,8,128,1,float16,fp8,0,0.01732800031701724
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,64,1,128,1,float16,float16,0,0.21251734097798666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,64,1,128,1,float16,fp8,0,0.23974400758743286
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,64,2,128,1,float16,float16,0,0.22800000508626303
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,64,4,128,1,float16,float16,0,0.25979199012120563
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,64,4,128,1,float16,fp8,0,0.29958399136861164
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,64,8,128,1,float16,float16,0,0.23593600591023764
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,64,64,128,1,float16,float16,0,0.32096532980600995
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,64,8,128,1,float16,fp8,0,0.2709333300590515
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,64,64,128,1,float16,fp8,0,0.3062933286031087
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,64,1,128,1,float16,float16,0,0.11478933691978455
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,64,1,128,1,float16,fp8,0,0.13425599535306296
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,64,2,128,1,float16,float16,0,0.12222400307655334
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,64,2,128,1,float16,fp8,0,0.1409119963645935
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,64,2,128,1,float16,fp8,0,0.2539946635564168
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,64,8,128,1,float16,float16,0,0.12654399871826172
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,64,4,128,1,float16,fp8,0,0.1625493367513021
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,64,8,128,1,float16,fp8,0,0.14756266276041666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,64,4,128,1,float16,float16,0,0.13900799552599588
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,64,64,128,1,float16,float16,0,0.16734933853149414
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,64,64,128,1,float16,fp8,0,0.16299200057983398
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,64,1,128,1,float16,float16,0,0.06656000018119812
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,64,1,128,1,float16,fp8,0,0.0758240024248759
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,64,2,128,1,float16,float16,0,0.0705386648575465
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,64,2,128,1,float16,fp8,0,0.07946133116881053
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,64,4,128,1,float16,float16,0,0.0788213312625885
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,64,8,128,1,float16,float16,0,0.06974400083223979
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,64,8,128,1,float16,fp8,0,0.08183999856313069
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,64,4,128,1,float16,fp8,0,0.08879466851552327
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,64,64,128,1,float16,float16,0,0.08987733721733093
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,64,64,128,1,float16,fp8,0,0.08925333619117737
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,64,1,128,1,float16,float16,0,0.03277866790692011
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,64,1,128,1,float16,fp8,0,0.038165333370367684
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,64,2,128,1,float16,float16,0,0.03440000116825104
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,64,4,128,1,float16,float16,0,0.03930133332808813
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,64,2,128,1,float16,fp8,0,0.039018665750821434
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,64,4,128,1,float16,fp8,0,0.04340266684691111
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,64,8,128,1,float16,float16,0,0.036133334040641785
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,64,8,128,1,float16,fp8,0,0.04212800165017446
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,64,64,128,1,float16,fp8,0,0.04562666515509287
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,64,64,128,1,float16,float16,0,0.0498933345079422
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,64,1,128,1,float16,float16,0,0.020938667158285778
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,64,1,128,1,float16,fp8,0,0.024703999360402424
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,64,2,128,1,float16,float16,0,0.02120000123977661
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,64,2,128,1,float16,fp8,0,0.02478933334350586
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,64,4,128,1,float16,float16,0,0.022624000906944275
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,64,4,128,1,float16,fp8,0,0.026341333985328674
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,64,8,128,1,float16,float16,0,0.022522665560245514
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,64,8,128,1,float16,fp8,0,0.025941332181294758
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,64,64,128,1,float16,float16,0,0.025498665869235992
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,64,64,128,1,float16,fp8,0,0.026517334083716076
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,64,1,128,1,float16,float16,0,0.015370666980743408
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,64,2,128,1,float16,float16,0,0.015418666104475657
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,64,1,128,1,float16,fp8,0,0.01803733284274737
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,64,2,128,1,float16,fp8,0,0.018245333184798557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,64,4,128,1,float16,float16,0,0.015706667055686314
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,64,4,128,1,float16,fp8,0,0.017978666971127193
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,64,8,128,1,float16,float16,0,0.01581866666674614
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,64,8,128,1,float16,fp8,0,0.017946666727463405
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,64,64,128,1,float16,float16,0,0.017968000223239262
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,64,64,128,1,float16,fp8,0,0.01878400022784869
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,64,1,128,1,float16,float16,0,0.01470400020480156
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,64,1,128,1,float16,fp8,0,0.01754133279124896
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,64,2,128,1,float16,float16,0,0.015130666395028433
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,64,2,128,1,float16,fp8,0,0.017616000026464462
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,64,4,128,1,float16,float16,0,0.015189333508412043
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,64,4,128,1,float16,fp8,0,0.017605333278576534
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,64,8,128,1,float16,float16,0,0.014842666685581207
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,64,8,128,1,float16,fp8,0,0.01722666621208191
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,64,64,128,1,float16,fp8,0,0.015002666662136713
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,64,2,128,1,float16,float16,0,0.014842666685581207
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,64,64,128,1,float16,float16,0,0.013882666826248169
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,64,1,128,1,float16,float16,0,0.014874666929244995
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,64,1,128,1,float16,fp8,0,0.01703466723362605
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,64,2,128,1,float16,fp8,0,0.01732800031701724
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,64,4,128,1,float16,float16,0,0.014890667051076889
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,64,4,128,1,float16,fp8,0,0.017301333447297413
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,64,8,128,1,float16,float16,0,0.014426667243242264
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,64,8,128,1,float16,fp8,0,0.017184000462293625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,64,64,128,1,float16,float16,0,0.013461332768201828
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,64,64,128,1,float16,fp8,0,0.014831999937693277
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,64,1,128,1,float16,float16,0,0.01404800017674764
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,64,1,128,1,float16,fp8,0,0.01617066686352094
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,64,2,128,1,float16,float16,0,0.014554666976133982
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,64,2,128,1,float16,fp8,0,0.016122666498025257
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,64,4,128,1,float16,float16,0,0.014080000420411428
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,64,4,128,1,float16,fp8,0,0.01628799984852473
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,64,8,128,1,float16,float16,0,0.014293332894643148
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,64,8,128,1,float16,fp8,0,0.016149333367745083
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,64,1,128,1,float16,float16,0,0.11493333180745442
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,64,1,128,1,float16,fp8,0,0.13501866658528647
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,64,2,128,1,float16,fp8,0,0.14095999797185263
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,64,4,128,1,float16,float16,0,0.1397599975268046
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,64,64,128,1,float16,float16,0,0.2722986737887065
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,64,8,128,1,float16,float16,0,0.12428266803423564
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,64,2,128,1,float16,float16,0,0.12193066875139873
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,64,8,128,1,float16,fp8,0,0.14752533038457236
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,64,64,128,1,float16,fp8,0,0.26241066058476764
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,64,1,128,1,float16,float16,0,0.06639466683069865
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,64,1,128,1,float16,fp8,0,0.07650133470694225
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,64,4,128,1,float16,fp8,0,0.16243732968966165
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,64,4,128,1,float16,float16,0,0.07876800000667572
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,64,2,128,1,float16,float16,0,0.0703893353541692
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,64,2,128,1,float16,fp8,0,0.07970133423805237
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,64,4,128,1,float16,fp8,0,0.08967999617258708
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,64,8,128,1,float16,fp8,0,0.0819413314263026
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,64,64,128,1,float16,float16,0,0.1420799990495046
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,64,1,128,1,float16,float16,0,0.033029332756996155
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,64,8,128,1,float16,float16,0,0.06977599859237671
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,64,64,128,1,float16,fp8,0,0.13827199737230936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,64,1,128,1,float16,fp8,0,0.037263999382654824
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,64,2,128,1,float16,float16,0,0.03450666616360346
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,64,2,128,1,float16,fp8,0,0.03941333293914795
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,64,4,128,1,float16,float16,0,0.03956799954175949
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,64,8,128,1,float16,float16,0,0.03603733330965042
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,64,4,128,1,float16,fp8,0,0.04281599819660187
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,64,8,128,1,float16,fp8,0,0.041306667029857635
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,64,64,128,1,float16,fp8,0,0.07046933472156525
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,64,64,128,1,float16,float16,0,0.07574933270613353
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,64,1,128,1,float16,float16,0,0.021125334004561108
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,64,1,128,1,float16,fp8,0,0.024613333245118458
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,64,2,128,1,float16,float16,0,0.02109866589307785
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,64,2,128,1,float16,fp8,0,0.024613333245118458
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,64,4,128,1,float16,float16,0,0.022389332453409832
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,64,4,128,1,float16,fp8,0,0.026357332865397137
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,64,8,128,1,float16,float16,0,0.02204799900452296
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,64,8,128,1,float16,fp8,0,0.02587733417749405
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,64,64,128,1,float16,float16,0,0.038005332152048744
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,64,64,128,1,float16,fp8,0,0.0383840004603068
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,64,1,128,1,float16,float16,0,0.01523200049996376
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,64,1,128,1,float16,fp8,0,0.01810666670401891
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,64,2,128,1,float16,float16,0,0.015370666980743408
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,64,2,128,1,float16,fp8,0,0.018144000321626663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,64,8,128,1,float16,fp8,0,0.017792000124851864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,64,4,128,1,float16,float16,0,0.015578666081031164
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,64,4,128,1,float16,fp8,0,0.018218666315078735
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,64,8,128,1,float16,float16,0,0.015471999843915304
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,64,64,128,1,float16,float16,0,0.024266667664051056
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,64,64,128,1,float16,fp8,0,0.02534399926662445
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,64,1,128,1,float16,float16,0,0.014885333677132925
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,64,1,128,1,float16,fp8,0,0.017909333109855652
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,64,2,128,1,float16,float16,0,0.015098666151364645
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,64,4,128,1,float16,float16,0,0.014954666296641031
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,64,2,128,1,float16,fp8,0,0.017583999782800674
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,64,4,128,1,float16,fp8,0,0.01782400036851565
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,64,8,128,1,float16,float16,0,0.014549333602190018
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,64,8,128,1,float16,fp8,0,0.0174346665541331
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,64,64,128,1,float16,float16,0,0.01738133281469345
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,64,64,128,1,float16,fp8,0,0.0186666672428449
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,64,1,128,1,float16,float16,0,0.014789332946141561
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,64,1,128,1,float16,fp8,0,0.01716800034046173
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,64,2,128,1,float16,float16,0,0.014602666099866232
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,64,2,128,1,float16,fp8,0,0.01720000058412552
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,64,64,128,1,float16,float16,0,0.013829333086808523
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,64,4,128,1,float16,float16,0,0.01504533365368843
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,64,4,128,1,float16,fp8,0,0.017407999684413273
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,64,8,128,1,float16,float16,0,0.014293332894643148
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,64,8,128,1,float16,fp8,0,0.016965333372354507
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,64,64,128,1,float16,fp8,0,0.014938666174809137
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,64,1,128,1,float16,float16,0,0.013951999445756277
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,64,1,128,1,float16,fp8,0,0.01605333387851715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,64,2,128,1,float16,float16,0,0.0141546664138635
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,64,2,128,1,float16,fp8,0,0.01602666700879733
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,64,4,128,1,float16,float16,0,0.014192000031471252
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,64,4,128,1,float16,fp8,0,0.016165333489576977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,64,8,128,1,float16,float16,0,0.013797332843144735
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,64,8,128,1,float16,fp8,0,0.016544000556071598
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,64,64,128,1,float16,fp8,0,0.014384000251690546
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,64,64,128,1,float16,float16,0,0.01350933313369751
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,64,1,128,1,float16,float16,0,0.013807999591032663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,64,1,128,1,float16,fp8,0,0.015749332805474598
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,64,2,128,1,float16,float16,0,0.013914667069911957
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,64,2,128,1,float16,fp8,0,0.016016000260909397
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,64,4,128,1,float16,float16,0,0.013850666582584381
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,64,4,128,1,float16,fp8,0,0.016000000139077503
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,64,8,128,1,float16,float16,0,0.01339200014869372
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,64,8,128,1,float16,fp8,0,0.015562667200962702
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,48,1,128,1,float16,fp8,0,41.60210673014323
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,48,2,128,1,float16,fp8,0,41.91437784830729
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,48,4,128,1,float16,fp8,0,42.55753072102865
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,48,8,128,1,float16,fp8,0,42.55584971110026
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,48,1,128,1,float16,float16,0,47.68255106608073
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,48,2,128,1,float16,float16,0,47.1726328531901
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,48,4,128,1,float16,float16,0,47.34564208984375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,48,8,128,1,float16,float16,0,48.786163330078125
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,48,48,128,1,float16,fp8,0,22.260111490885418
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,48,48,128,1,float16,float16,0,24.79596710205078
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,48,1,128,1,float16,fp8,0,20.983221689860027
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,48,1,128,1,float16,float16,0,23.788052876790363
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,48,2,128,1,float16,fp8,0,20.955530802408855
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,48,2,128,1,float16,float16,0,23.63097635904948
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,48,4,128,1,float16,float16,0,24.01482645670573
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,48,4,128,1,float16,fp8,0,21.330623626708984
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,48,48,128,1,float16,float16,0,12.996901194254557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,48,48,128,1,float16,fp8,0,11.402080535888672
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,48,1,128,1,float16,float16,0,11.862111409505209
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,48,1,128,1,float16,fp8,0,10.728266398111979
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,48,2,128,1,float16,fp8,0,10.78981908162435
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,48,2,128,1,float16,float16,0,12.513914744059244
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,48,8,128,1,float16,fp8,0,21.882059733072918
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,48,48,128,1,float16,float16,0,6.03761609395345
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,48,8,128,1,float16,float16,0,26.658533732096355
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,48,48,128,1,float16,fp8,0,5.877386728922526
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,48,4,128,1,float16,fp8,0,10.737412770589193
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,48,4,128,1,float16,float16,0,12.217381795247396
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,48,8,128,1,float16,fp8,0,10.748560587565104
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,48,8,128,1,float16,float16,0,12.039477030436197
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,48,1,128,1,float16,float16,0,5.572181065877278
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,48,1,128,1,float16,fp8,0,5.8223520914713545
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,48,2,128,1,float16,float16,0,5.843823750813802
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,48,2,128,1,float16,fp8,0,5.343482971191406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,48,4,128,1,float16,float16,0,5.815439860026042
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,48,8,128,1,float16,float16,0,5.741167704264323
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,48,4,128,1,float16,fp8,0,5.829514821370442
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,48,8,128,1,float16,fp8,0,5.670746485392253
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,48,1,128,1,float16,fp8,0,23.822240193684895
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,48,1,128,1,float16,float16,0,27.102554321289062
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,48,2,128,1,float16,fp8,0,23.984458923339844
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,48,2,128,1,float16,float16,0,26.806859334309895
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,48,4,128,1,float16,fp8,0,24.52696990966797
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,48,4,128,1,float16,float16,0,27.3362553914388
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,48,8,128,1,float16,float16,0,27.50670878092448
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,48,48,128,1,float16,float16,0,14.256938934326172
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,48,1,128,1,float16,float16,0,13.440085093180338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,48,1,128,1,float16,fp8,0,12.034506479899088
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,48,48,128,1,float16,fp8,0,12.933685302734375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,48,2,128,1,float16,float16,0,13.736186981201172
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,48,8,128,1,float16,fp8,0,24.623311360677082
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,48,2,128,1,float16,fp8,0,12.034805297851562
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,48,4,128,1,float16,float16,0,13.698650360107422
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,48,48,128,1,float16,float16,0,6.805434544881185
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,48,48,128,1,float16,fp8,0,6.581653594970703
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,48,1,128,1,float16,fp8,0,6.051749547322591
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,48,1,128,1,float16,float16,0,6.282218933105469
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,48,2,128,1,float16,float16,0,6.465242385864258
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,48,4,128,1,float16,fp8,0,12.38750966389974
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,48,8,128,1,float16,fp8,0,12.342842102050781
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,48,8,128,1,float16,float16,0,13.767888387044271
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,48,4,128,1,float16,float16,0,6.646874745686849
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,48,2,128,1,float16,fp8,0,6.282538731892903
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,48,48,128,1,float16,float16,0,3.2981866200764975
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,48,4,128,1,float16,fp8,0,6.19700813293457
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,48,48,128,1,float16,fp8,0,3.3295253117879233
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,48,8,128,1,float16,float16,0,6.721984227498372
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,48,1,128,1,float16,float16,0,3.163263956705729
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,48,8,128,1,float16,fp8,0,6.402463912963867
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,48,1,128,1,float16,fp8,0,3.4091361363728843
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,48,2,128,1,float16,float16,0,3.064864158630371
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,48,2,128,1,float16,fp8,0,3.22653865814209
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,48,4,128,1,float16,float16,0,3.108250617980957
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,48,4,128,1,float16,fp8,0,3.1321067810058594
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,48,8,128,1,float16,float16,0,3.4730612436930337
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,48,8,128,1,float16,fp8,0,3.260085423787435
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,48,1,128,1,float16,fp8,0,16.783915201822918
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,48,1,128,1,float16,float16,0,18.959679921468098
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,48,2,128,1,float16,fp8,0,16.90013376871745
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,48,2,128,1,float16,float16,0,18.982624053955078
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,48,4,128,1,float16,fp8,0,17.206858317057293
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,48,4,128,1,float16,float16,0,19.24990463256836
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,48,8,128,1,float16,fp8,0,17.37046941121419
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,48,8,128,1,float16,float16,0,19.419376373291016
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,48,1,128,1,float16,float16,0,9.072506586710611
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,48,48,128,1,float16,float16,0,10.149424235026041
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,48,48,128,1,float16,fp8,0,9.375343958536783
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,48,1,128,1,float16,fp8,0,8.529797236124674
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,48,2,128,1,float16,float16,0,9.416293462117514
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,48,2,128,1,float16,fp8,0,8.654031753540039
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,48,4,128,1,float16,float16,0,9.435487747192383
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,48,4,128,1,float16,fp8,0,8.885632197062174
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,48,48,128,1,float16,fp8,0,4.690773328145345
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,48,48,128,1,float16,float16,0,4.609509468078613
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,48,1,128,1,float16,float16,0,4.400229454040527
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,48,1,128,1,float16,fp8,0,4.268869400024414
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,48,2,128,1,float16,float16,0,4.514896074930827
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,48,2,128,1,float16,fp8,0,4.293125470479329
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,48,8,128,1,float16,float16,0,9.543749491373697
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,48,8,128,1,float16,fp8,0,8.743050893147787
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,48,4,128,1,float16,float16,0,4.4903411865234375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,48,48,128,1,float16,float16,0,2.3813066482543945
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,48,8,128,1,float16,float16,0,4.497056007385254
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,48,48,128,1,float16,fp8,0,2.38864533106486
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,48,1,128,1,float16,float16,0,2.4300427436828613
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,48,4,128,1,float16,fp8,0,4.488293329874675
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,48,8,128,1,float16,fp8,0,4.42299747467041
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,48,1,128,1,float16,fp8,0,2.1753652890523276
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,48,2,128,1,float16,float16,0,2.1943839391072593
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,48,2,128,1,float16,fp8,0,2.2151467005411782
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,48,4,128,1,float16,float16,0,2.242970625559489
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,48,4,128,1,float16,fp8,0,2.2490879694620767
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,48,8,128,1,float16,float16,0,2.22981325785319
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,48,8,128,1,float16,fp8,0,2.2584266662597656
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,48,1,128,1,float16,fp8,0,21.920740763346355
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,48,2,128,1,float16,fp8,0,22.083343505859375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,48,1,128,1,float16,float16,0,24.39312998453776
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,48,2,128,1,float16,float16,0,24.51597849527995
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,48,4,128,1,float16,float16,0,24.95806376139323
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,48,4,128,1,float16,fp8,0,22.6735102335612
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,48,8,128,1,float16,fp8,0,22.738011678059895
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,48,8,128,1,float16,float16,0,25.324203491210938
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,48,48,128,1,float16,float16,0,13.310282389322916
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,48,1,128,1,float16,float16,0,12.279535929361979
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,48,1,128,1,float16,fp8,0,11.056976318359375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,48,48,128,1,float16,fp8,0,12.290149688720703
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,48,2,128,1,float16,float16,0,12.498106638590494
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,48,2,128,1,float16,fp8,0,11.058629353841146
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,48,4,128,1,float16,float16,0,12.572612762451172
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,48,4,128,1,float16,fp8,0,11.480400085449219
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,48,48,128,1,float16,float16,0,6.331765492757161
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,48,48,128,1,float16,fp8,0,6.154309590657552
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,48,1,128,1,float16,float16,0,5.883077621459961
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,48,1,128,1,float16,fp8,0,5.536538441975911
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,48,2,128,1,float16,float16,0,5.785743713378906
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,48,2,128,1,float16,fp8,0,5.5830078125
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,48,8,128,1,float16,fp8,0,11.523333231608072
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,48,8,128,1,float16,float16,0,12.779605865478516
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,48,4,128,1,float16,float16,0,5.8332265218098955
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,48,48,128,1,float16,float16,0,3.0420586268107095
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,48,4,128,1,float16,fp8,0,5.915280024210612
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,48,8,128,1,float16,float16,0,6.17739741007487
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,48,48,128,1,float16,fp8,0,3.1392319997151694
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,48,1,128,1,float16,float16,0,3.1499414443969727
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,48,8,128,1,float16,fp8,0,6.043327967325847
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,48,1,128,1,float16,fp8,0,2.907888094584147
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,48,2,128,1,float16,float16,0,2.9120000203450522
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,48,2,128,1,float16,fp8,0,2.835551897684733
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,48,4,128,1,float16,float16,0,2.952325185139974
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,48,4,128,1,float16,fp8,0,2.929423967997233
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,48,8,128,1,float16,float16,0,2.8791945775349936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,48,48,128,1,float16,float16,0,1.5419519742329915
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,48,8,128,1,float16,fp8,0,2.9408960342407227
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,48,1,128,1,float16,float16,0,1.39081605275472
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,48,48,128,1,float16,fp8,0,1.7729493776957195
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,48,1,128,1,float16,fp8,0,1.607701301574707
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,48,2,128,1,float16,float16,0,1.4241174062093098
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,48,2,128,1,float16,fp8,0,1.5547946294148762
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,48,4,128,1,float16,fp8,0,1.4987252553304036
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,48,4,128,1,float16,float16,0,1.450042724609375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,48,8,128,1,float16,float16,0,1.4454827308654785
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,48,8,128,1,float16,fp8,0,1.530677318572998
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,48,1,128,1,float16,fp8,0,12.753397623697916
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,48,1,128,1,float16,float16,0,14.345530192057291
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,48,2,128,1,float16,float16,0,14.383338928222656
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,48,2,128,1,float16,fp8,0,12.892191569010416
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,48,4,128,1,float16,float16,0,14.508800506591797
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,48,8,128,1,float16,fp8,0,13.412427266438803
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,48,8,128,1,float16,float16,0,14.607701619466146
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,48,4,128,1,float16,fp8,0,13.330980936686197
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,48,48,128,1,float16,float16,0,7.603439966837565
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,48,1,128,1,float16,fp8,0,6.391477584838867
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,48,1,128,1,float16,float16,0,7.076234817504883
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,48,48,128,1,float16,fp8,0,7.413210550944011
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,48,2,128,1,float16,float16,0,6.853989283243815
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,48,2,128,1,float16,fp8,0,6.468544006347656
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,48,4,128,1,float16,float16,0,7.074352264404297
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,48,4,128,1,float16,fp8,0,6.716047922770183
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,48,48,128,1,float16,float16,0,3.6793174743652344
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,48,1,128,1,float16,float16,0,3.530405362447103
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,48,1,128,1,float16,fp8,0,3.217514673868815
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,48,48,128,1,float16,fp8,0,3.726016044616699
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,48,2,128,1,float16,float16,0,3.2579520543416343
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,48,8,128,1,float16,fp8,0,6.712666829427083
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,48,8,128,1,float16,float16,0,7.249477386474609
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,48,2,128,1,float16,fp8,0,3.24510924021403
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,48,48,128,1,float16,float16,0,1.8191253344217937
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,48,4,128,1,float16,float16,0,3.364325205485026
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,48,8,128,1,float16,float16,0,3.32692813873291
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,48,4,128,1,float16,fp8,0,3.708762804667155
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,48,1,128,1,float16,float16,0,1.674341360727946
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,48,48,128,1,float16,fp8,0,1.8885653813680012
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,48,8,128,1,float16,fp8,0,3.4351733525594077
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,48,1,128,1,float16,fp8,0,1.6384906768798828
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,48,2,128,1,float16,float16,0,1.6043893496195476
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,48,2,128,1,float16,fp8,0,1.7182505925496419
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,48,4,128,1,float16,float16,0,1.7246613502502441
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,48,4,128,1,float16,fp8,0,1.7637972831726074
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,48,8,128,1,float16,float16,0,1.7039252916971843
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,48,48,128,1,float16,fp8,0,0.9822506904602051
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,48,1,128,1,float16,float16,0,0.8172106742858887
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,48,48,128,1,float16,float16,0,0.9362186590830485
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,48,8,128,1,float16,fp8,0,1.7408426602681477
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,48,1,128,1,float16,fp8,0,0.8630452950795492
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,48,2,128,1,float16,float16,0,0.8503733476003011
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,48,2,128,1,float16,fp8,0,0.8668426672617594
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,48,4,128,1,float16,float16,0,0.8630346457163492
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,48,4,128,1,float16,fp8,0,0.8997653325398763
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,48,8,128,1,float16,float16,0,0.8563573360443115
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,48,8,128,1,float16,fp8,0,0.9054293632507324
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,48,1,128,1,float16,float16,0,13.07470957438151
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,48,1,128,1,float16,fp8,0,12.097296396891275
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,48,2,128,1,float16,float16,0,13.512816111246744
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,48,2,128,1,float16,fp8,0,12.233786265055338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,48,4,128,1,float16,float16,0,13.879311879475912
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,48,8,128,1,float16,fp8,0,12.876762390136719
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,48,8,128,1,float16,float16,0,13.898085276285807
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,48,4,128,1,float16,fp8,0,12.807076772054037
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,48,1,128,1,float16,fp8,0,6.0383148193359375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,48,1,128,1,float16,float16,0,6.297829310099284
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,48,48,128,1,float16,float16,0,7.252810796101888
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,48,48,128,1,float16,fp8,0,7.337781270345052
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,48,2,128,1,float16,float16,0,6.407050450642903
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,48,2,128,1,float16,fp8,0,6.078122456868489
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,48,4,128,1,float16,float16,0,6.667626698811849
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,48,4,128,1,float16,fp8,0,6.413839975992839
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,48,48,128,1,float16,float16,0,3.545210520426432
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,48,1,128,1,float16,float16,0,3.0510079065958657
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,48,48,128,1,float16,fp8,0,3.68015988667806
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,48,1,128,1,float16,fp8,0,3.0250507990519204
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,48,2,128,1,float16,float16,0,3.1057653427124023
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,48,8,128,1,float16,float16,0,6.5430450439453125
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,48,2,128,1,float16,fp8,0,3.058490753173828
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,48,8,128,1,float16,fp8,0,6.641824086507161
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,48,48,128,1,float16,float16,0,1.748538653055827
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,48,4,128,1,float16,float16,0,3.1833972930908203
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,48,8,128,1,float16,float16,0,3.1872854232788086
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,48,4,128,1,float16,fp8,0,3.3760480880737305
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,48,8,128,1,float16,fp8,0,3.2809438705444336
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,48,1,128,1,float16,float16,0,1.4868106842041016
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,48,48,128,1,float16,fp8,0,1.8558185895284016
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,48,1,128,1,float16,fp8,0,1.5723412831624348
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,48,2,128,1,float16,float16,0,1.5276320775349934
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,48,2,128,1,float16,fp8,0,1.6148853302001953
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,48,4,128,1,float16,float16,0,1.5975786844889324
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,48,4,128,1,float16,fp8,0,1.6489653587341309
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,48,8,128,1,float16,float16,0,1.596901257832845
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,48,48,128,1,float16,float16,0,0.8947306474049886
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,48,1,128,1,float16,float16,0,0.7656319936116537
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,48,8,128,1,float16,fp8,0,1.6734612782796223
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,48,1,128,1,float16,fp8,0,0.8084746996561686
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,48,48,128,1,float16,fp8,0,0.9762559731801351
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,48,2,128,1,float16,float16,0,0.7718506654103597
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,48,2,128,1,float16,fp8,0,0.8140160242716471
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,48,4,128,1,float16,float16,0,0.8022879759470621
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,48,4,128,1,float16,fp8,0,0.8677066961924235
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,48,8,128,1,float16,float16,0,0.8279253641764323
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,48,8,128,1,float16,fp8,0,0.8683040142059326
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,48,48,128,1,float16,fp8,0,0.5065653324127197
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,48,48,128,1,float16,float16,0,0.47489066918691
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,48,1,128,1,float16,float16,0,0.4020693302154541
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,48,1,128,1,float16,fp8,0,0.4226826826731364
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,48,2,128,1,float16,float16,0,0.40706666310628253
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,48,4,128,1,float16,float16,0,0.4230560064315796
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,48,2,128,1,float16,fp8,0,0.43030401070912677
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,48,4,128,1,float16,fp8,0,0.44779733816782635
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,48,8,128,1,float16,float16,0,0.4294346570968628
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,48,8,128,1,float16,fp8,0,0.45958399772644043
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,48,1,128,1,float16,float16,0,7.586517333984375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,48,1,128,1,float16,fp8,0,7.239472071329753
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,48,2,128,1,float16,float16,0,7.857919692993164
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,48,4,128,1,float16,float16,0,8.008848190307617
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,48,4,128,1,float16,fp8,0,7.92083740234375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,48,8,128,1,float16,float16,0,8.234895706176758
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,48,2,128,1,float16,fp8,0,7.284677505493164
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,48,1,128,1,float16,float16,0,3.4966611862182617
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,48,48,128,1,float16,float16,0,4.237829208374023
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,48,48,128,1,float16,fp8,0,4.613173484802246
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,48,1,128,1,float16,fp8,0,4.092965443929036
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,48,2,128,1,float16,float16,0,3.597914695739746
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,48,2,128,1,float16,fp8,0,3.862208048502604
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,48,4,128,1,float16,float16,0,3.81659730275472
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,48,8,128,1,float16,fp8,0,7.945514678955078
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,48,48,128,1,float16,float16,0,2.1481332778930664
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,48,8,128,1,float16,float16,0,3.9026667277018228
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,48,48,128,1,float16,fp8,0,2.4712160428365073
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,48,1,128,1,float16,float16,0,1.7453120549519856
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,48,4,128,1,float16,fp8,0,4.155962626139323
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,48,8,128,1,float16,fp8,0,3.9784374237060547
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,48,1,128,1,float16,fp8,0,1.9588160514831543
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,48,2,128,1,float16,float16,0,1.7657705942789714
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,48,2,128,1,float16,fp8,0,1.8712746302286785
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,48,4,128,1,float16,float16,0,1.922650655110677
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,48,4,128,1,float16,fp8,0,2.0600372950236
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,48,8,128,1,float16,float16,0,1.906010627746582
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,48,48,128,1,float16,float16,0,1.095146656036377
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,48,8,128,1,float16,fp8,0,2.031402587890625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,48,48,128,1,float16,fp8,0,1.213765303293864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,48,1,128,1,float16,float16,0,0.8849226633707682
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,48,1,128,1,float16,fp8,0,0.9407733281453451
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,48,2,128,1,float16,float16,0,0.9069706598917643
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,48,2,128,1,float16,fp8,0,0.9558666547139486
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,48,4,128,1,float16,float16,0,0.9593280156453451
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,48,8,128,1,float16,fp8,0,1.0434292952219646
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,48,4,128,1,float16,fp8,0,1.025487979253133
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,48,1,128,1,float16,float16,0,0.4567360083262126
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,48,48,128,1,float16,fp8,0,0.6203039884567261
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,48,8,128,1,float16,float16,0,0.9735413392384847
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,48,48,128,1,float16,float16,0,0.5569279988606771
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,48,2,128,1,float16,fp8,0,0.5002986590067545
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,48,1,128,1,float16,fp8,0,0.4912000099817912
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,48,2,128,1,float16,float16,0,0.4718720118204753
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,48,4,128,1,float16,float16,0,0.49459731578826904
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,48,4,128,1,float16,fp8,0,0.5392853418986002
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,48,8,128,1,float16,float16,0,0.5028693278630575
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,48,48,128,1,float16,float16,0,0.2997973362604777
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,48,8,128,1,float16,fp8,0,0.5443146626154581
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,48,48,128,1,float16,fp8,0,0.3335040012995402
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,48,1,128,1,float16,float16,0,0.24574933449427286
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,48,1,128,1,float16,fp8,0,0.2643253405888875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,48,2,128,1,float16,float16,0,0.2513813376426697
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,48,2,128,1,float16,fp8,0,0.2709226608276367
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,48,4,128,1,float16,float16,0,0.2640960017840068
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,48,8,128,1,float16,float16,0,0.2701279918352763
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,48,4,128,1,float16,fp8,0,0.29050666093826294
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,48,8,128,1,float16,fp8,0,0.29494933287302655
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,48,1,128,1,float16,float16,0,7.2324269612630205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,48,1,128,1,float16,fp8,0,7.249327977498372
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,48,2,128,1,float16,float16,0,7.416666666666667
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,48,2,128,1,float16,fp8,0,7.337381362915039
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,48,4,128,1,float16,float16,0,7.949296315511067
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,48,4,128,1,float16,fp8,0,8.008458455403646
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,48,8,128,1,float16,fp8,0,8.058789571126303
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,48,48,128,1,float16,float16,0,4.4318132400512695
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,48,1,128,1,float16,float16,0,3.40611203511556
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,48,1,128,1,float16,fp8,0,3.6185121536254883
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,48,48,128,1,float16,fp8,0,4.878160158793132
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,48,2,128,1,float16,float16,0,3.613493283589681
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,48,8,128,1,float16,float16,0,7.826586405436198
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,48,2,128,1,float16,fp8,0,3.6671040852864585
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,48,4,128,1,float16,float16,0,3.774474779764811
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,48,48,128,1,float16,float16,0,2.2217119534810386
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,48,4,128,1,float16,fp8,0,4.127776145935059
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,48,1,128,1,float16,float16,0,1.6981493631998699
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,48,8,128,1,float16,float16,0,3.8130613962809243
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,48,48,128,1,float16,fp8,0,2.5285654067993164
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,48,8,128,1,float16,fp8,0,4.065258661905925
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,48,2,128,1,float16,float16,0,1.7589866320292156
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,48,2,128,1,float16,fp8,0,1.8595306078592937
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,48,1,128,1,float16,fp8,0,1.8884053230285645
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,48,4,128,1,float16,float16,0,1.8951412836710613
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,48,4,128,1,float16,fp8,0,2.0718560218811035
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,48,8,128,1,float16,float16,0,1.8843146959940593
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,48,8,128,1,float16,fp8,0,2.0600266456604004
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,48,48,128,1,float16,float16,0,1.1283306280771892
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,48,48,128,1,float16,fp8,0,1.2502506573994954
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,48,1,128,1,float16,float16,0,0.8696533044179281
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,48,1,128,1,float16,fp8,0,0.9367360273996989
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,48,2,128,1,float16,float16,0,0.8904053370157877
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,48,2,128,1,float16,fp8,0,0.9475626945495605
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,48,4,128,1,float16,float16,0,0.9556372960408529
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,48,8,128,1,float16,float16,0,0.9687893390655518
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,48,4,128,1,float16,fp8,0,1.0421600341796875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,48,48,128,1,float16,float16,0,0.5789386828740438
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,48,8,128,1,float16,fp8,0,1.0582186381022136
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,48,48,128,1,float16,fp8,0,0.6464853286743164
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,48,1,128,1,float16,float16,0,0.4450560013453166
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,48,1,128,1,float16,fp8,0,0.4804160197575887
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,48,2,128,1,float16,float16,0,0.4575786590576172
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,48,2,128,1,float16,fp8,0,0.49244264761606854
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,48,4,128,1,float16,float16,0,0.49540265401204425
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,48,4,128,1,float16,fp8,0,0.5400160153706869
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,48,8,128,1,float16,float16,0,0.4962453444798787
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,48,48,128,1,float16,float16,0,0.3036800026893616
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,48,8,128,1,float16,fp8,0,0.5482986768086752
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,48,48,128,1,float16,fp8,0,0.3413173357645671
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,48,1,128,1,float16,fp8,0,0.2600799997647603
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,48,2,128,1,float16,float16,0,0.24464533726374307
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,48,1,128,1,float16,float16,0,0.23555733760197958
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,48,2,128,1,float16,fp8,0,0.26401599248250324
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,48,4,128,1,float16,float16,0,0.2627413272857666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,48,4,128,1,float16,fp8,0,0.286789337793986
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,48,8,128,1,float16,float16,0,0.26440000534057617
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,48,8,128,1,float16,fp8,0,0.291157325108846
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,48,2,128,1,float16,float16,0,0.13451733191808066
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,48,48,128,1,float16,float16,0,0.1688800056775411
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,48,48,128,1,float16,fp8,0,0.18864534298578897
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,48,4,128,1,float16,fp8,0,0.15759999553362528
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,48,1,128,1,float16,float16,0,0.12998400131861368
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,48,1,128,1,float16,fp8,0,0.1410719950993856
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,48,2,128,1,float16,fp8,0,0.1463466684023539
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,48,4,128,1,float16,float16,0,0.14497599999109903
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,48,8,128,1,float16,float16,0,0.1477013329664866
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,48,8,128,1,float16,fp8,0,0.16208533445994058
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,48,1,128,1,float16,fp8,0,4.459786732991536
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,48,1,128,1,float16,float16,0,4.145626703898112
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,48,4,128,1,float16,float16,0,4.669429461161296
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,48,2,128,1,float16,fp8,0,4.63318411509196
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,48,8,128,1,float16,float16,0,4.689130783081055
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,48,4,128,1,float16,fp8,0,5.080933252970378
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,48,8,128,1,float16,fp8,0,5.105632146199544
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,48,1,128,1,float16,float16,0,2.0889172554016113
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,48,48,128,1,float16,float16,0,2.861525217692057
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,48,2,128,1,float16,float16,0,4.258213361104329
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,48,1,128,1,float16,fp8,0,2.2777439753214517
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,48,48,128,1,float16,fp8,0,3.334970792134603
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,48,2,128,1,float16,fp8,0,2.299701372782389
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,48,2,128,1,float16,float16,0,2.211568037668864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,48,4,128,1,float16,float16,0,2.3501866658528647
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,48,4,128,1,float16,fp8,0,2.605327924092611
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,48,8,128,1,float16,float16,0,2.3967199325561523
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,48,48,128,1,float16,float16,0,1.4321974118550618
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,48,48,128,1,float16,fp8,0,1.647322654724121
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,48,1,128,1,float16,float16,0,1.033018668492635
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,48,1,128,1,float16,fp8,0,1.1516693433125813
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,48,2,128,1,float16,float16,0,1.0788373152414958
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,48,8,128,1,float16,fp8,0,2.583418687184652
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,48,2,128,1,float16,fp8,0,1.1780532995859783
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,48,4,128,1,float16,float16,0,1.1933066844940186
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,48,4,128,1,float16,fp8,0,1.3170613447825115
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,48,8,128,1,float16,float16,0,1.2186826864878337
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,48,48,128,1,float16,float16,0,0.7239147027333578
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,48,8,128,1,float16,fp8,0,1.3360212643941243
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,48,48,128,1,float16,fp8,0,0.8344159921010336
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,48,1,128,1,float16,float16,0,0.5315200090408325
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,48,1,128,1,float16,fp8,0,0.5910773277282715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,48,2,128,1,float16,float16,0,0.5603786706924438
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,48,4,128,1,float16,float16,0,0.6142133474349976
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,48,2,128,1,float16,fp8,0,0.6059786478678385
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,48,4,128,1,float16,fp8,0,0.67575470606486
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,48,8,128,1,float16,fp8,0,0.6870826880137125
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,48,8,128,1,float16,float16,0,0.6124906539916992
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,48,48,128,1,float16,float16,0,0.3784639835357666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,48,2,128,1,float16,fp8,0,0.3198026617368062
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,48,48,128,1,float16,fp8,0,0.4360320170720418
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,48,1,128,1,float16,float16,0,0.27988799413045246
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,48,1,128,1,float16,fp8,0,0.3112586736679077
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,48,4,128,1,float16,fp8,0,0.35257065296173096
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,48,2,128,1,float16,float16,0,0.29605867465337116
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,48,4,128,1,float16,float16,0,0.31777065992355347
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,48,8,128,1,float16,float16,0,0.32441065708796185
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,48,8,128,1,float16,fp8,0,0.3651839892069499
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,48,48,128,1,float16,float16,0,0.2046239972114563
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,48,48,128,1,float16,fp8,0,0.23468265930811563
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,48,1,128,1,float16,float16,0,0.1534986694653829
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,48,1,128,1,float16,fp8,0,0.17137600978215536
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,48,2,128,1,float16,float16,0,0.15917866428693137
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,48,2,128,1,float16,fp8,0,0.17583467562993368
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,48,4,128,1,float16,float16,0,0.175327996412913
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,48,8,128,1,float16,float16,0,0.17706133921941122
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,48,48,128,1,float16,fp8,0,0.13291200002034506
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,48,4,128,1,float16,fp8,0,0.19208000103632608
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,48,48,128,1,float16,float16,0,0.11479999621709187
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,48,8,128,1,float16,fp8,0,0.19528534015019736
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,48,1,128,1,float16,float16,0,0.0863200028737386
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,48,1,128,1,float16,fp8,0,0.09623466928799947
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,48,2,128,1,float16,float16,0,0.08805333574612935
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,48,2,128,1,float16,fp8,0,0.09568533301353455
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,48,4,128,1,float16,float16,0,0.09550933043162028
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,48,4,128,1,float16,fp8,0,0.10518933335940044
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,48,8,128,1,float16,float16,0,0.0983840028444926
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,48,8,128,1,float16,fp8,0,0.10723732908566792
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,48,1,128,1,float16,float16,0,4.327141443888347
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,48,1,128,1,float16,fp8,0,4.730181376139323
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,48,2,128,1,float16,float16,0,4.433322588602702
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,48,8,128,1,float16,float16,0,5.037386576334636
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,48,4,128,1,float16,fp8,0,5.601861317952474
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,48,48,128,1,float16,float16,0,3.1629225413004556
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,48,8,128,1,float16,fp8,0,5.727461496988933
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,48,2,128,1,float16,fp8,0,4.907423973083496
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,48,1,128,1,float16,float16,0,2.2142133712768555
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,48,1,128,1,float16,fp8,0,2.4060212771097818
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,48,48,128,1,float16,fp8,0,3.7409706115722656
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,48,4,128,1,float16,fp8,0,2.811386744181315
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,48,2,128,1,float16,fp8,0,2.4975040753682456
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,48,4,128,1,float16,float16,0,5.050527890523274
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,48,48,128,1,float16,float16,0,1.6127893129984539
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,48,8,128,1,float16,float16,0,2.5405386288960776
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,48,8,128,1,float16,fp8,0,2.9012158711751304
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,48,2,128,1,float16,float16,0,2.2362987200419107
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,48,2,128,1,float16,fp8,0,1.2545119921366374
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,48,1,128,1,float16,float16,0,1.0992639859517415
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,48,4,128,1,float16,float16,0,2.5416746139526367
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,48,2,128,1,float16,float16,0,1.1474346319834392
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,48,48,128,1,float16,fp8,0,1.8907893498738606
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,48,4,128,1,float16,fp8,0,1.4265333811442058
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,48,4,128,1,float16,float16,0,1.2804853121439617
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,48,1,128,1,float16,fp8,0,1.228437344233195
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,48,8,128,1,float16,float16,0,1.2990826765696208
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,48,1,128,1,float16,float16,0,0.5571999947230021
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,48,8,128,1,float16,fp8,0,1.4860906600952148
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,48,48,128,1,float16,float16,0,0.8023040294647217
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,48,48,128,1,float16,fp8,0,0.9611626466115316
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,48,1,128,1,float16,fp8,0,0.6193706591924032
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,48,2,128,1,float16,float16,0,0.590773344039917
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,48,2,128,1,float16,fp8,0,0.6459146738052368
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,48,4,128,1,float16,float16,0,0.6552799940109253
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,48,4,128,1,float16,fp8,0,0.7345813115437826
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,48,8,128,1,float16,float16,0,0.6652586857477824
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,48,8,128,1,float16,fp8,0,0.7462186813354492
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,48,48,128,1,float16,float16,0,0.42081065972646076
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,48,1,128,1,float16,float16,0,0.2919413248697917
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,48,48,128,1,float16,fp8,0,0.49559998512268066
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,48,1,128,1,float16,fp8,0,0.326746662457784
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,48,2,128,1,float16,float16,0,0.30746666590372723
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,48,2,128,1,float16,fp8,0,0.33399466673533124
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,48,4,128,1,float16,float16,0,0.3439679940541585
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,48,48,128,1,float16,fp8,0,0.265557328859965
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,48,8,128,1,float16,float16,0,0.3477333386739095
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,48,1,128,1,float16,float16,0,0.15618133544921875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,48,8,128,1,float16,fp8,0,0.391973336537679
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,48,48,128,1,float16,float16,0,0.22367999951044717
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,48,4,128,1,float16,fp8,0,0.3833013375600179
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,48,1,128,1,float16,fp8,0,0.17526400089263916
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,48,2,128,1,float16,float16,0,0.1651893357435862
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,48,2,128,1,float16,fp8,0,0.1844373345375061
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,48,8,128,1,float16,fp8,0,0.2105813423792521
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,48,4,128,1,float16,float16,0,0.18278932571411133
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,48,4,128,1,float16,fp8,0,0.20583999156951904
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,48,8,128,1,float16,float16,0,0.1852160096168518
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,48,48,128,1,float16,fp8,0,0.14546666542689005
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,48,48,128,1,float16,float16,0,0.1227839986483256
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,48,1,128,1,float16,float16,0,0.0881866713364919
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,48,1,128,1,float16,fp8,0,0.09990933537483215
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,48,2,128,1,float16,float16,0,0.09133866429328918
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,48,2,128,1,float16,fp8,0,0.10098666946093242
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,48,4,128,1,float16,float16,0,0.09990400075912476
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,48,4,128,1,float16,fp8,0,0.1146399974822998
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,48,8,128,1,float16,float16,0,0.10256000359853108
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,48,8,128,1,float16,fp8,0,0.11733333269755046
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,48,48,128,1,float16,float16,0,0.07089599967002869
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,48,48,128,1,float16,fp8,0,0.08630933364232381
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,48,1,128,1,float16,float16,0,0.048170665899912514
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,48,2,128,1,float16,float16,0,0.05031466484069824
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,48,1,128,1,float16,fp8,0,0.05421333511670431
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,48,2,128,1,float16,fp8,0,0.05842666824658712
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,48,4,128,1,float16,float16,0,0.054416000843048096
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,48,4,128,1,float16,fp8,0,0.06322133541107178
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,48,8,128,1,float16,float16,0,0.05548266569773356
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,48,8,128,1,float16,fp8,0,0.06384000182151794
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,48,1,128,1,float16,float16,0,3.1091521581014
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,48,2,128,1,float16,float16,0,3.3397652308146157
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,48,1,128,1,float16,fp8,0,3.5269492467244468
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,48,2,128,1,float16,fp8,0,3.7403039932250977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,48,4,128,1,float16,float16,0,3.9145387013753257
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,48,8,128,1,float16,float16,0,3.9197279612223306
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,48,1,128,1,float16,float16,0,1.579370657602946
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,48,48,128,1,float16,float16,0,2.5646026929219565
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,48,48,128,1,float16,fp8,0,3.306037267049154
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,48,1,128,1,float16,fp8,0,1.7910559972127278
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,48,8,128,1,float16,fp8,0,4.470645268758138
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,48,4,128,1,float16,fp8,0,4.4852854410807295
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,48,2,128,1,float16,fp8,0,1.8895999590555828
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,48,4,128,1,float16,float16,0,1.9734026590983074
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,48,1,128,1,float16,float16,0,0.7996373176574707
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,48,2,128,1,float16,float16,0,1.693504015604655
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,48,4,128,1,float16,fp8,0,2.2706665992736816
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,48,8,128,1,float16,float16,0,1.99727996190389
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,48,1,128,1,float16,fp8,0,0.9079840183258057
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,48,2,128,1,float16,float16,0,0.8674240112304688
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,48,48,128,1,float16,float16,0,1.2985226313273113
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,48,2,128,1,float16,fp8,0,0.9623839855194092
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,48,8,128,1,float16,fp8,0,2.273226737976074
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,48,4,128,1,float16,float16,0,0.9957760175069174
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,48,48,128,1,float16,float16,0,0.659226655960083
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,48,4,128,1,float16,fp8,0,1.1485599676767986
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,48,8,128,1,float16,fp8,0,1.194698651631673
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,48,48,128,1,float16,fp8,0,0.8490346272786459
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,48,48,128,1,float16,fp8,0,1.6825599670410156
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,48,1,128,1,float16,fp8,0,0.46301865577697754
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,48,1,128,1,float16,float16,0,0.40742401281992596
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,48,2,128,1,float16,float16,0,0.444106658299764
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,48,2,128,1,float16,fp8,0,0.4949066638946533
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,48,4,128,1,float16,float16,0,0.5127413272857666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,48,4,128,1,float16,fp8,0,0.5933493375778198
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,48,8,128,1,float16,fp8,0,0.6021973292032877
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,48,48,128,1,float16,float16,0,0.33921066919962567
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,48,1,128,1,float16,float16,0,0.21579732497533163
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,48,48,128,1,float16,fp8,0,0.4403040011723836
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,48,8,128,1,float16,float16,0,1.0120000044504802
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,48,8,128,1,float16,float16,0,0.5166879892349243
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,48,4,128,1,float16,fp8,0,0.3087306618690491
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,48,1,128,1,float16,fp8,0,0.24566400051116943
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,48,2,128,1,float16,float16,0,0.23382399479548135
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,48,2,128,1,float16,fp8,0,0.2590826749801636
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,48,48,128,1,float16,fp8,0,0.23230934143066406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,48,4,128,1,float16,float16,0,0.2630293369293213
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,48,8,128,1,float16,fp8,0,0.3232106765111287
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,48,48,128,1,float16,float16,0,0.18273067474365234
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,48,8,128,1,float16,float16,0,0.2725226680437724
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,48,1,128,1,float16,float16,0,0.11730666955312093
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,48,1,128,1,float16,fp8,0,0.1350986659526825
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,48,2,128,1,float16,float16,0,0.12628799676895142
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,48,2,128,1,float16,fp8,0,0.14275733629862467
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,48,4,128,1,float16,float16,0,0.14049599568049112
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,48,4,128,1,float16,fp8,0,0.1662613352139791
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,48,8,128,1,float16,fp8,0,0.17227200667063394
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,48,48,128,1,float16,float16,0,0.10008000334103902
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,48,8,128,1,float16,float16,0,0.14408533771832785
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,48,48,128,1,float16,fp8,0,0.12773332993189493
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,48,1,128,1,float16,float16,0,0.06764266888300578
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,48,1,128,1,float16,fp8,0,0.07633066674073537
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,48,2,128,1,float16,float16,0,0.07150933146476746
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,48,8,128,1,float16,fp8,0,0.09637866417566936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,48,2,128,1,float16,fp8,0,0.08211733400821686
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,48,4,128,1,float16,float16,0,0.07993599772453308
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,48,8,128,1,float16,float16,0,0.08080533146858215
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,48,4,128,1,float16,fp8,0,0.09305066863695781
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,48,48,128,1,float16,float16,0,0.05875200033187866
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,48,48,128,1,float16,fp8,0,0.07386666536331177
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,48,1,128,1,float16,float16,0,0.036133334040641785
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,48,1,128,1,float16,fp8,0,0.04386133452256521
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,48,2,128,1,float16,float16,0,0.03969600051641464
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,48,2,128,1,float16,fp8,0,0.045935998360315956
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,48,4,128,1,float16,float16,0,0.042208001017570496
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,48,4,128,1,float16,fp8,0,0.05078400174776713
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,48,8,128,1,float16,float16,0,0.043696001172065735
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,48,8,128,1,float16,fp8,0,0.05083199838797251
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,48,48,128,1,float16,float16,0,0.03249066571394602
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,48,48,128,1,float16,fp8,0,0.04283200204372406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,48,1,128,1,float16,float16,0,0.026586666703224182
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,48,1,128,1,float16,fp8,0,0.031141333281993866
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,48,8,128,1,float16,float16,0,0.028101332485675812
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,48,2,128,1,float16,float16,0,0.02712533374627431
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,48,8,128,1,float16,fp8,0,0.035717333356539406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,48,2,128,1,float16,fp8,0,0.031018666923046112
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,48,4,128,1,float16,float16,0,0.028586665789286297
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,48,4,128,1,float16,fp8,0,0.034261333445707955
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,48,1,128,1,float16,float16,0,1.2989813486735027
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,48,1,128,1,float16,fp8,0,1.4771307309468586
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,48,2,128,1,float16,float16,0,1.3861546516418457
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,48,2,128,1,float16,fp8,0,1.6185706456502278
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,48,4,128,1,float16,fp8,0,1.9752693176269531
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,48,48,128,1,float16,float16,0,1.1597493489583333
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,48,1,128,1,float16,float16,0,0.6570133368174235
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,48,4,128,1,float16,float16,0,1.6928213437398274
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,48,48,128,1,float16,fp8,0,1.4411253929138184
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,48,8,128,1,float16,fp8,0,2.0457919438680015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,48,8,128,1,float16,float16,0,1.714037259419759
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,48,1,128,1,float16,fp8,0,0.7476320266723633
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,48,2,128,1,float16,float16,0,0.7332639694213867
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,48,2,128,1,float16,fp8,0,0.8179306983947754
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,48,4,128,1,float16,float16,0,0.856602668762207
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,48,4,128,1,float16,fp8,0,1.0032533009847004
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,48,8,128,1,float16,float16,0,0.8705440362294515
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,48,48,128,1,float16,float16,0,0.5904213190078735
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,48,1,128,1,float16,float16,0,0.339354674021403
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,48,48,128,1,float16,fp8,0,0.7399360338846842
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,48,8,128,1,float16,fp8,0,1.0431573390960693
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,48,1,128,1,float16,fp8,0,0.38785600662231445
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,48,2,128,1,float16,float16,0,0.3738186756769816
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,48,2,128,1,float16,fp8,0,0.421125332514445
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,48,4,128,1,float16,float16,0,0.4407573143641154
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,48,4,128,1,float16,fp8,0,0.5178933143615723
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,48,8,128,1,float16,fp8,0,0.5335520108540853
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,48,48,128,1,float16,float16,0,0.30659733215967816
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,48,48,128,1,float16,fp8,0,0.3782293399175008
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,48,1,128,1,float16,float16,0,0.1801919937133789
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,48,1,128,1,float16,fp8,0,0.20699199040730795
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,48,2,128,1,float16,float16,0,0.19566933314005533
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,48,8,128,1,float16,float16,0,0.4442986647288005
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,48,2,128,1,float16,fp8,0,0.22251200675964355
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,48,4,128,1,float16,fp8,0,0.271999994913737
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,48,4,128,1,float16,float16,0,0.22872533400853476
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,48,8,128,1,float16,fp8,0,0.281333327293396
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,48,48,128,1,float16,float16,0,0.16474666198094687
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,48,48,128,1,float16,fp8,0,0.20175999402999878
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,48,1,128,1,float16,float16,0,0.0992693305015564
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,48,1,128,1,float16,fp8,0,0.11386666695276897
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,48,2,128,1,float16,float16,0,0.10819199681282043
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,48,2,128,1,float16,fp8,0,0.12286933263142903
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,48,8,128,1,float16,float16,0,0.23419199387232462
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,48,4,128,1,float16,float16,0,0.12242133418718974
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,48,8,128,1,float16,float16,0,0.1237440009911855
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,48,4,128,1,float16,fp8,0,0.1469013293584188
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,48,8,128,1,float16,fp8,0,0.15245333313941956
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,48,48,128,1,float16,float16,0,0.09078933795293172
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,48,48,128,1,float16,fp8,0,0.11290666460990906
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,48,1,128,1,float16,float16,0,0.05689600110054016
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,48,1,128,1,float16,fp8,0,0.06558933357397716
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,48,2,128,1,float16,float16,0,0.06106133262316386
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,48,8,128,1,float16,fp8,0,0.08658666412035625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,48,2,128,1,float16,fp8,0,0.06881066660086314
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,48,4,128,1,float16,float16,0,0.06838933130105336
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,48,4,128,1,float16,fp8,0,0.0812853326400121
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,48,8,128,1,float16,float16,0,0.07019733389218648
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,48,48,128,1,float16,float16,0,0.052527998884518944
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,48,1,128,1,float16,float16,0,0.03165333221356074
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,48,48,128,1,float16,fp8,0,0.06632000207901001
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,48,1,128,1,float16,fp8,0,0.03583466758330663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,48,2,128,1,float16,float16,0,0.032357332607110344
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,48,2,128,1,float16,fp8,0,0.03864533454179764
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,48,4,128,1,float16,float16,0,0.0352906659245491
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,48,4,128,1,float16,fp8,0,0.04260799785455068
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,48,8,128,1,float16,fp8,0,0.04401599864164988
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,48,8,128,1,float16,float16,0,0.036159999668598175
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,48,48,128,1,float16,float16,0,0.028757333755493164
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,48,48,128,1,float16,fp8,0,0.036720000207424164
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,48,1,128,1,float16,float16,0,0.022240000466505688
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,48,1,128,1,float16,fp8,0,0.02666666607062022
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,48,2,128,1,float16,float16,0,0.02258133391539256
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,48,2,128,1,float16,fp8,0,0.026778665681680042
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,48,4,128,1,float16,float16,0,0.023818666736284893
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,48,48,128,1,float16,fp8,0,0.024549332757790882
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,48,4,128,1,float16,fp8,0,0.028538666665554047
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,48,8,128,1,float16,float16,0,0.023914667467276256
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,48,8,128,1,float16,fp8,0,0.029472000896930695
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,48,48,128,1,float16,float16,0,0.018725333114465077
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,48,4,128,1,float16,float16,0,0.01725333308180173
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,48,1,128,1,float16,float16,0,0.016682667036851246
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,48,1,128,1,float16,fp8,0,0.019999999552965164
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,48,2,128,1,float16,float16,0,0.016757333030303318
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,48,2,128,1,float16,fp8,0,0.019941333681344986
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,48,4,128,1,float16,fp8,0,0.02027200038234393
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,48,8,128,1,float16,float16,0,0.016970666746298473
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,48,8,128,1,float16,fp8,0,0.021327999730904896
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,48,1,128,1,float16,float16,0,0.6131946643193563
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,48,1,128,1,float16,fp8,0,0.6655466556549072
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,48,2,128,1,float16,float16,0,0.6764907042185465
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,48,2,128,1,float16,fp8,0,0.7235306898752848
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,48,4,128,1,float16,float16,0,0.8070933024088541
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,48,4,128,1,float16,fp8,0,0.8995839754740397
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,48,48,128,1,float16,float16,0,0.5578666528066
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,48,8,128,1,float16,float16,0,0.8238986333211263
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,48,1,128,1,float16,float16,0,0.3184266686439514
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,48,48,128,1,float16,fp8,0,0.6302293141682943
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,48,1,128,1,float16,fp8,0,0.35596267382303876
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,48,8,128,1,float16,fp8,0,0.9334987004597982
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,48,2,128,1,float16,float16,0,0.3468480110168457
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,48,2,128,1,float16,fp8,0,0.37803733348846436
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,48,4,128,1,float16,float16,0,0.41625599066416424
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,48,4,128,1,float16,fp8,0,0.47278400262196857
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,48,8,128,1,float16,float16,0,0.41743465264638263
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,48,48,128,1,float16,float16,0,0.2922079960505168
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,48,1,128,1,float16,float16,0,0.17062934239705405
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,48,8,128,1,float16,fp8,0,0.46783467133839923
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,48,48,128,1,float16,fp8,0,0.33504001299540204
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,48,1,128,1,float16,fp8,0,0.19489065806070963
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,48,2,128,1,float16,float16,0,0.18826667467753092
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,48,2,128,1,float16,fp8,0,0.20196799437204996
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,48,4,128,1,float16,fp8,0,0.24940800666809082
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,48,4,128,1,float16,float16,0,0.21657600005467734
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,48,8,128,1,float16,float16,0,0.22181334098180136
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,48,8,128,1,float16,fp8,0,0.2536799907684326
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,48,48,128,1,float16,float16,0,0.15528000394503275
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,48,1,128,1,float16,float16,0,0.09451199571291606
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,48,48,128,1,float16,fp8,0,0.1804693341255188
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,48,1,128,1,float16,fp8,0,0.10656533638636272
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,48,2,128,1,float16,float16,0,0.10234133402506511
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,48,2,128,1,float16,fp8,0,0.11258133252461751
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,48,4,128,1,float16,float16,0,0.11749866604804993
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,48,4,128,1,float16,fp8,0,0.13708266615867615
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,48,8,128,1,float16,float16,0,0.12142399946848552
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,48,48,128,1,float16,float16,0,0.0869706670443217
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,48,48,128,1,float16,fp8,0,0.09849066535631816
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,48,1,128,1,float16,float16,0,0.05318933228651682
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,48,8,128,1,float16,fp8,0,0.13911466797192892
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,48,1,128,1,float16,fp8,0,0.057215998570124306
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,48,4,128,1,float16,float16,0,0.06588266789913177
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,48,2,128,1,float16,float16,0,0.057461331288019814
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,48,2,128,1,float16,fp8,0,0.06161599854628245
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,48,4,128,1,float16,fp8,0,0.07205333312352498
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,48,8,128,1,float16,float16,0,0.06670933465162913
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,48,8,128,1,float16,fp8,0,0.07715733349323273
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,48,48,128,1,float16,float16,0,0.049546668926874794
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,48,48,128,1,float16,fp8,0,0.05551999807357788
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,48,1,128,1,float16,float16,0,0.027114666998386383
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,48,1,128,1,float16,fp8,0,0.031845333675543465
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,48,2,128,1,float16,float16,0,0.02855466554562251
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,48,2,128,1,float16,fp8,0,0.03325333446264267
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,48,4,128,1,float16,float16,0,0.031685332457224526
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,48,4,128,1,float16,fp8,0,0.03640533238649368
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,48,8,128,1,float16,float16,0,0.03232000023126602
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,48,8,128,1,float16,fp8,0,0.03797333439191183
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,48,48,128,1,float16,float16,0,0.02741866558790207
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,48,48,128,1,float16,fp8,0,0.030085332691669464
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,48,1,128,1,float16,float16,0,0.020666666328907013
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,48,1,128,1,float16,fp8,0,0.024149333437283833
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,48,2,128,1,float16,float16,0,0.02067733307679494
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,48,4,128,1,float16,float16,0,0.022272000710169475
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,48,2,128,1,float16,fp8,0,0.024277334411938984
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,48,8,128,1,float16,float16,0,0.022517333428064983
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,48,4,128,1,float16,fp8,0,0.025888000925381977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,48,8,128,1,float16,fp8,0,0.02605333427588145
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,48,48,128,1,float16,fp8,0,0.020181333025296528
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,48,48,128,1,float16,float16,0,0.01714133347074191
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,48,1,128,1,float16,float16,0,0.015311999867359797
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,48,1,128,1,float16,fp8,0,0.017871999492247898
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,48,2,128,1,float16,float16,0,0.015594666202863058
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,48,8,128,1,float16,fp8,0,0.018186666071414948
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,48,2,128,1,float16,fp8,0,0.01794133335351944
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,48,4,128,1,float16,float16,0,0.015365333606799444
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,48,4,128,1,float16,fp8,0,0.018165333817402523
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,48,8,128,1,float16,float16,0,0.015546667079130808
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,48,48,128,1,float16,float16,0,0.016000000139077503
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,48,48,128,1,float16,fp8,0,0.019519999623298645
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,48,1,128,1,float16,float16,0,0.015285332997639975
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,48,1,128,1,float16,fp8,0,0.017338667064905167
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,48,2,128,1,float16,float16,0,0.015178666760524115
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,48,2,128,1,float16,fp8,0,0.017664000391960144
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,48,4,128,1,float16,float16,0,0.015263999501864115
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,48,4,128,1,float16,fp8,0,0.017968000223239262
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,48,8,128,1,float16,float16,0,0.015344000111023584
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,48,8,128,1,float16,fp8,0,0.01752000053723653
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,48,1,128,1,float16,float16,0,0.3124693234761556
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,48,2,128,1,float16,float16,0,0.3472906748453776
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,48,2,128,1,float16,fp8,0,0.37240533034006756
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,48,1,128,1,float16,fp8,0,0.35684800148010254
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,48,4,128,1,float16,float16,0,0.4071360031763713
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,48,8,128,1,float16,fp8,0,0.47953065236409503
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,48,8,128,1,float16,float16,0,0.4123946825663249
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,48,1,128,1,float16,float16,0,0.1678559978802999
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,48,48,128,1,float16,float16,0,0.3320320049921672
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,48,4,128,1,float16,fp8,0,0.4742026726404826
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,48,48,128,1,float16,fp8,0,0.3346986770629883
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,48,1,128,1,float16,fp8,0,0.19273600975672403
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,48,2,128,1,float16,float16,0,0.1829866568247477
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,48,4,128,1,float16,fp8,0,0.24904000759124756
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,48,2,128,1,float16,fp8,0,0.20567999283472696
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,48,4,128,1,float16,float16,0,0.21490132808685303
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,48,48,128,1,float16,fp8,0,0.17678399880727133
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,48,1,128,1,float16,float16,0,0.09458133578300476
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,48,8,128,1,float16,fp8,0,0.25595200061798096
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,48,1,128,1,float16,fp8,0,0.10686933000882466
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,48,2,128,1,float16,float16,0,0.10222400228182475
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,48,4,128,1,float16,fp8,0,0.13609066605567932
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,48,48,128,1,float16,float16,0,0.17483200629552206
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,48,2,128,1,float16,fp8,0,0.11308266719182332
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,48,4,128,1,float16,float16,0,0.11870400110880534
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,48,8,128,1,float16,float16,0,0.2208426594734192
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,48,8,128,1,float16,float16,0,0.12108799815177917
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,48,8,128,1,float16,fp8,0,0.13712533315022787
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,48,48,128,1,float16,float16,0,0.0953386624654134
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,48,48,128,1,float16,fp8,0,0.096261332432429
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,48,1,128,1,float16,fp8,0,0.05878399809201559
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,48,2,128,1,float16,float16,0,0.057717333237330117
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,48,1,128,1,float16,float16,0,0.05302399893601736
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,48,2,128,1,float16,fp8,0,0.06299200157324474
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,48,8,128,1,float16,float16,0,0.06605866551399231
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,48,4,128,1,float16,float16,0,0.06491733094056447
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,48,4,128,1,float16,fp8,0,0.07373333474000295
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,48,8,128,1,float16,fp8,0,0.07589866717656453
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,48,48,128,1,float16,float16,0,0.054858664671579994
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,48,2,128,1,float16,float16,0,0.02809600035349528
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,48,1,128,1,float16,float16,0,0.02677333354949951
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,48,1,128,1,float16,fp8,0,0.031744000812371574
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,48,48,128,1,float16,fp8,0,0.05296533306439718
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,48,2,128,1,float16,fp8,0,0.033488000432650246
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,48,4,128,1,float16,float16,0,0.030741333961486816
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,48,8,128,1,float16,float16,0,0.031583999594052635
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,48,4,128,1,float16,fp8,0,0.036464000741640724
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,48,8,128,1,float16,fp8,0,0.03698666642109553
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,48,48,128,1,float16,float16,0,0.02701866626739502
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,48,48,128,1,float16,fp8,0,0.026447998980681103
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,48,1,128,1,float16,float16,0,0.020549333343903225
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,48,1,128,1,float16,fp8,0,0.024058667321999867
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,48,2,128,1,float16,float16,0,0.020848001043001812
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,48,2,128,1,float16,fp8,0,0.024165332317352295
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,48,4,128,1,float16,float16,0,0.02205866575241089
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,48,4,128,1,float16,fp8,0,0.025839999318122864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,48,8,128,1,float16,float16,0,0.02218666672706604
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,48,8,128,1,float16,fp8,0,0.025861332813898723
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,48,48,128,1,float16,float16,0,0.01826133330663045
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,48,48,128,1,float16,fp8,0,0.019354666272799175
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,48,1,128,1,float16,float16,0,0.015210667004187902
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,48,1,128,1,float16,fp8,0,0.01747200017174085
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,48,2,128,1,float16,float16,0,0.015317333241303762
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,48,2,128,1,float16,fp8,0,0.01794133335351944
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,48,8,128,1,float16,fp8,0,0.018197332819302876
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,48,48,128,1,float16,float16,0,0.013946666071812311
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,48,4,128,1,float16,float16,0,0.015647999942302704
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,48,4,128,1,float16,fp8,0,0.018181333939234417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,48,8,128,1,float16,float16,0,0.01563199982047081
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,48,48,128,1,float16,fp8,0,0.015381333728631338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,48,1,128,1,float16,float16,0,0.015040000279744467
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,48,1,128,1,float16,fp8,0,0.01746133342385292
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,48,4,128,1,float16,float16,0,0.015322666615247726
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,48,2,128,1,float16,fp8,0,0.017616000026464462
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,48,2,128,1,float16,float16,0,0.015237333873907724
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,48,4,128,1,float16,fp8,0,0.017818666994571686
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,48,8,128,1,float16,float16,0,0.014954666296641031
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,48,1,128,1,float16,fp8,0,0.017130666722853977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,48,8,128,1,float16,fp8,0,0.018016000588734944
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,48,48,128,1,float16,float16,0,0.013781332721312841
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,48,48,128,1,float16,fp8,0,0.014767999450365702
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,48,1,128,1,float16,float16,0,0.014639999717473984
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,48,8,128,1,float16,float16,0,0.015637333194414776
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,48,2,128,1,float16,float16,0,0.014831999937693277
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,48,2,128,1,float16,fp8,0,0.017210666090250015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,48,4,128,1,float16,float16,0,0.014970666418472925
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,48,4,128,1,float16,fp8,0,0.017279999951521557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,48,8,128,1,float16,fp8,0,0.017071999609470367
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,48,1,128,1,float16,float16,0,0.16848532358805338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,48,1,128,1,float16,fp8,0,0.193066676457723
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,48,2,128,1,float16,float16,0,0.1808746655782064
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,48,2,128,1,float16,fp8,0,0.2006346583366394
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,48,4,128,1,float16,float16,0,0.2142560084660848
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,48,4,128,1,float16,fp8,0,0.251802663008372
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,48,48,128,1,float16,float16,0,0.2502506573994954
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,48,8,128,1,float16,fp8,0,0.2918773293495178
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,48,48,128,1,float16,fp8,0,0.2478826642036438
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,48,1,128,1,float16,float16,0,0.09220799803733826
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,48,1,128,1,float16,fp8,0,0.10738133390744527
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,48,8,128,1,float16,float16,0,0.25017066796620685
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,48,2,128,1,float16,float16,0,0.10133333007494609
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,48,2,128,1,float16,fp8,0,0.11336533228556316
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,48,4,128,1,float16,float16,0,0.11793599526087443
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,48,4,128,1,float16,fp8,0,0.13461866974830627
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,48,48,128,1,float16,fp8,0,0.13108266393343607
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,48,8,128,1,float16,float16,0,0.13612799843152365
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,48,8,128,1,float16,fp8,0,0.15731199582417807
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,48,48,128,1,float16,float16,0,0.13223999738693237
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,48,1,128,1,float16,float16,0,0.05285866558551788
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,48,1,128,1,float16,fp8,0,0.056048000852266945
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,48,2,128,1,float16,float16,0,0.057909334699312844
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,48,2,128,1,float16,fp8,0,0.059578667084376015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,48,4,128,1,float16,float16,0,0.06588799754778545
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,48,4,128,1,float16,fp8,0,0.07336533566315968
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,48,8,128,1,float16,float16,0,0.07594666878382365
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,48,8,128,1,float16,fp8,0,0.087909330924352
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,48,48,128,1,float16,float16,0,0.07282666862010956
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,48,48,128,1,float16,fp8,0,0.0713973343372345
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,48,1,128,1,float16,float16,0,0.02683199942111969
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,48,1,128,1,float16,fp8,0,0.031290667752424874
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,48,2,128,1,float16,float16,0,0.02808533360560735
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,48,2,128,1,float16,fp8,0,0.03329599897066752
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,48,4,128,1,float16,float16,0,0.031125334401925404
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,48,48,128,1,float16,fp8,0,0.035445332527160645
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,48,4,128,1,float16,fp8,0,0.03641066700220108
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,48,8,128,1,float16,float16,0,0.03628266602754593
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,48,8,128,1,float16,fp8,0,0.04289066791534424
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,48,48,128,1,float16,float16,0,0.037685332198937736
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,48,1,128,1,float16,float16,0,0.020442667106787365
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,48,1,128,1,float16,fp8,0,0.0242399995525678
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,48,2,128,1,float16,float16,0,0.02070933332045873
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,48,2,128,1,float16,fp8,0,0.023962666591008503
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,48,4,128,1,float16,float16,0,0.0220320001244545
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,48,48,128,1,float16,float16,0,0.021717332303524017
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,48,4,128,1,float16,fp8,0,0.02568000058333079
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,48,8,128,1,float16,float16,0,0.02195200075705846
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,48,8,128,1,float16,fp8,0,0.026047999660174053
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,48,48,128,1,float16,fp8,0,0.022954667607943218
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,48,1,128,1,float16,float16,0,0.015274666249752045
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,48,1,128,1,float16,fp8,0,0.018021332720915478
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,48,2,128,1,float16,float16,0,0.015461333096027374
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,48,2,128,1,float16,fp8,0,0.017973333597183228
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,48,4,128,1,float16,float16,0,0.01565333331624667
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,48,4,128,1,float16,fp8,0,0.01782400036851565
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,48,8,128,1,float16,float16,0,0.015504000087579092
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,48,8,128,1,float16,fp8,0,0.017994667092959087
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,48,48,128,1,float16,float16,0,0.017407999684413273
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,48,48,128,1,float16,fp8,0,0.01844800015290578
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,48,1,128,1,float16,float16,0,0.015082667271296183
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,48,1,128,1,float16,fp8,0,0.017445333302021027
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,48,2,128,1,float16,float16,0,0.014858666807413101
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,48,2,128,1,float16,fp8,0,0.017605333278576534
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,48,4,128,1,float16,float16,0,0.015173333386580149
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,48,4,128,1,float16,fp8,0,0.01756799966096878
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,48,8,128,1,float16,float16,0,0.014858666807413101
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,48,8,128,1,float16,fp8,0,0.01735466718673706
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,48,48,128,1,float16,float16,0,0.013733333597580591
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,48,48,128,1,float16,fp8,0,0.014549333602190018
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,48,1,128,1,float16,float16,0,0.014592000593741735
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,48,1,128,1,float16,fp8,0,0.016757333030303318
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,48,2,128,1,float16,float16,0,0.014720000326633453
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,48,2,128,1,float16,fp8,0,0.017194667210181553
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,48,4,128,1,float16,float16,0,0.01461333284775416
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,48,4,128,1,float16,fp8,0,0.017322666943073273
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,48,8,128,1,float16,float16,0,0.014618666221698126
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,48,8,128,1,float16,fp8,0,0.01775466650724411
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,48,48,128,1,float16,float16,0,0.01452800010641416
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,48,48,128,1,float16,fp8,0,0.014592000593741735
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,48,1,128,1,float16,float16,0,0.014314666390419006
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,48,1,128,1,float16,fp8,0,0.015856000284353893
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,48,2,128,1,float16,float16,0,0.014021333307027817
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,48,2,128,1,float16,fp8,0,0.01613866661985715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,48,4,128,1,float16,float16,0,0.014085333794355392
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,48,8,128,1,float16,float16,0,0.013914667069911957
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,48,4,128,1,float16,fp8,0,0.016506666938463848
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,48,8,128,1,float16,fp8,0,0.01617066686352094
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,48,1,128,1,float16,float16,0,0.09318400422732036
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,48,1,128,1,float16,fp8,0,0.1060746709505717
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,48,2,128,1,float16,float16,0,0.10158933202425639
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,48,2,128,1,float16,fp8,0,0.113237331310908
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,48,4,128,1,float16,float16,0,0.13152533769607544
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,48,4,128,1,float16,fp8,0,0.15632533033688864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,48,48,128,1,float16,float16,0,0.21117866039276123
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,48,48,128,1,float16,fp8,0,0.20567466815312704
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,48,1,128,1,float16,float16,0,0.053077335158983864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,48,1,128,1,float16,fp8,0,0.05749333401521047
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,48,2,128,1,float16,float16,0,0.05754133562246958
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,48,8,128,1,float16,fp8,0,0.15865600109100342
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,48,2,128,1,float16,fp8,0,0.059392000238100685
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,48,4,128,1,float16,fp8,0,0.08471999565760295
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,48,8,128,1,float16,float16,0,0.13848533233006796
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,48,4,128,1,float16,float16,0,0.07401599983374278
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,48,8,128,1,float16,float16,0,0.07607466479142506
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,48,8,128,1,float16,fp8,0,0.08778132994969685
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,48,48,128,1,float16,float16,0,0.1111253301302592
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,48,48,128,1,float16,fp8,0,0.10918399691581726
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,48,1,128,1,float16,float16,0,0.026736001173655193
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,48,1,128,1,float16,fp8,0,0.031301334500312805
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,48,2,128,1,float16,float16,0,0.027962667246659596
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,48,2,128,1,float16,fp8,0,0.03310933212439219
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,48,4,128,1,float16,float16,0,0.0359253336985906
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,48,4,128,1,float16,fp8,0,0.04161600023508072
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,48,8,128,1,float16,float16,0,0.03677333394686381
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,48,8,128,1,float16,fp8,0,0.04230933388074239
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,48,48,128,1,float16,float16,0,0.0566293348868688
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,48,48,128,1,float16,fp8,0,0.05318933228651682
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,48,1,128,1,float16,float16,0,0.020373333245515823
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,48,1,128,1,float16,fp8,0,0.02430933217207591
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,48,2,128,1,float16,float16,0,0.020634666085243225
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,48,2,128,1,float16,fp8,0,0.02385599911212921
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,48,4,128,1,float16,float16,0,0.02229333420594533
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,48,4,128,1,float16,fp8,0,0.026000000536441803
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,48,8,128,1,float16,float16,0,0.021770666042963665
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,48,8,128,1,float16,fp8,0,0.025909334421157837
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,48,48,128,1,float16,fp8,0,0.032138665517171226
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,48,48,128,1,float16,float16,0,0.03178666780392329
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,48,1,128,1,float16,float16,0,0.015402667224407196
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,48,1,128,1,float16,fp8,0,0.018021332720915478
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,48,2,128,1,float16,float16,0,0.015301333119471868
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,48,2,128,1,float16,fp8,0,0.01757866640885671
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,48,4,128,1,float16,float16,0,0.015397333850463232
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,48,4,128,1,float16,fp8,0,0.01802666609485944
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,48,1,128,1,float16,float16,0,0.014943999548753103
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,48,8,128,1,float16,float16,0,0.015397333850463232
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,48,8,128,1,float16,fp8,0,0.01794133335351944
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,48,48,128,1,float16,float16,0,0.02094399929046631
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,48,48,128,1,float16,fp8,0,0.02170666555563609
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,48,1,128,1,float16,fp8,0,0.017477333545684814
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,48,2,128,1,float16,float16,0,0.014736000448465347
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,48,2,128,1,float16,fp8,0,0.01782400036851565
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,48,4,128,1,float16,float16,0,0.014991999914248785
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,48,4,128,1,float16,fp8,0,0.017423999806245167
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,48,8,128,1,float16,float16,0,0.01488000030318896
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,48,48,128,1,float16,float16,0,0.016672000288963318
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,48,8,128,1,float16,fp8,0,0.017466666797796886
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,48,48,128,1,float16,fp8,0,0.017866666118303936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,48,1,128,1,float16,float16,0,0.014501333236694336
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,48,1,128,1,float16,fp8,0,0.01739199956258138
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,48,2,128,1,float16,float16,0,0.014645333091417948
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,48,2,128,1,float16,fp8,0,0.0170666662355264
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,48,4,128,1,float16,float16,0,0.01463466634353002
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,48,4,128,1,float16,fp8,0,0.01714133347074191
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,48,8,128,1,float16,float16,0,0.014325333138306936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,48,8,128,1,float16,fp8,0,0.01700266698996226
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,48,48,128,1,float16,float16,0,0.013605333864688873
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,48,48,128,1,float16,fp8,0,0.01440000037352244
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,48,1,128,1,float16,float16,0,0.013663999736309052
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,48,1,128,1,float16,fp8,0,0.016058667252461117
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,48,2,128,1,float16,float16,0,0.013957332819700241
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,48,2,128,1,float16,fp8,0,0.015882667154073715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,48,4,128,1,float16,float16,0,0.014010666559139887
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,48,4,128,1,float16,fp8,0,0.016549333930015564
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,48,8,128,1,float16,float16,0,0.013829333086808523
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,48,8,128,1,float16,fp8,0,0.01602666700879733
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,48,48,128,1,float16,float16,0,0.013151999562978745
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,48,1,128,1,float16,float16,0,0.013786666095256805
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,48,48,128,1,float16,fp8,0,0.01431999976436297
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,48,1,128,1,float16,fp8,0,0.015647999942302704
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,48,2,128,1,float16,float16,0,0.013855999956528345
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,48,2,128,1,float16,fp8,0,0.015978666643301647
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,48,4,128,1,float16,float16,0,0.01402666668097178
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,48,4,128,1,float16,fp8,0,0.01603200038274129
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,48,8,128,1,float16,float16,0,0.013568000247081121
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,48,8,128,1,float16,fp8,0,0.01571200042963028
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,40,1,128,1,float16,fp8,0,34.684949239095054
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,40,2,128,1,float16,fp8,0,34.89822896321615
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,40,4,128,1,float16,fp8,0,35.723958333333336
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,40,1,128,1,float16,float16,0,39.627515157063804
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,40,8,128,1,float16,fp8,0,35.847023010253906
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,40,2,128,1,float16,float16,0,39.50152587890625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,40,8,128,1,float16,float16,0,40.30413818359375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,40,4,128,1,float16,float16,0,40.11943562825521
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,40,40,128,1,float16,float16,0,20.612794240315754
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,40,40,128,1,float16,fp8,0,18.68827184041341
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,40,1,128,1,float16,float16,0,20.028533935546875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,40,2,128,1,float16,fp8,0,17.510672251383465
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,40,1,128,1,float16,fp8,0,17.691893259684246
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,40,2,128,1,float16,float16,0,19.757333119710285
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,40,4,128,1,float16,float16,0,19.97476323445638
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,40,4,128,1,float16,fp8,0,17.879892985026043
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,40,40,128,1,float16,fp8,0,9.411962509155273
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,40,40,128,1,float16,float16,0,10.332506815592447
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,40,1,128,1,float16,float16,0,9.947210947672525
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,40,1,128,1,float16,fp8,0,8.837477366129557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,40,2,128,1,float16,float16,0,9.851792017618815
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,40,2,128,1,float16,fp8,0,8.8417599995931
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,40,8,128,1,float16,fp8,0,17.852799733479817
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,40,8,128,1,float16,float16,0,21.551856994628906
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,40,40,128,1,float16,float16,0,5.314048131306966
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,40,4,128,1,float16,float16,0,9.945370356241861
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,40,4,128,1,float16,fp8,0,9.405034383138021
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,40,8,128,1,float16,fp8,0,9.035296122233072
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,40,40,128,1,float16,fp8,0,4.765791893005371
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,40,8,128,1,float16,float16,0,10.7489865620931
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,40,1,128,1,float16,float16,0,4.661445299784343
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,40,1,128,1,float16,fp8,0,4.704895973205566
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,40,2,128,1,float16,float16,0,4.861578623453776
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,40,2,128,1,float16,fp8,0,4.759680112202962
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,40,4,128,1,float16,float16,0,4.998096148173015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,40,4,128,1,float16,fp8,0,4.575205485026042
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,40,8,128,1,float16,float16,0,4.820170720418294
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,40,8,128,1,float16,fp8,0,4.920826594034831
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,40,1,128,1,float16,fp8,0,19.908618927001953
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,40,1,128,1,float16,float16,0,22.930155436197918
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,40,2,128,1,float16,fp8,0,20.063509623209637
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,40,4,128,1,float16,fp8,0,20.564341227213543
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,40,2,128,1,float16,float16,0,22.447003682454426
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,40,8,128,1,float16,fp8,0,20.697668711344402
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,40,8,128,1,float16,float16,0,23.04125213623047
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,40,4,128,1,float16,float16,0,23.554410298665363
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,40,40,128,1,float16,float16,0,11.858960469563803
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,40,1,128,1,float16,float16,0,11.22320556640625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,40,40,128,1,float16,fp8,0,10.919663747151693
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,40,1,128,1,float16,fp8,0,10.009029388427734
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,40,2,128,1,float16,float16,0,11.557242075602213
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,40,2,128,1,float16,fp8,0,10.121295928955078
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,40,4,128,1,float16,fp8,0,10.35533332824707
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,40,4,128,1,float16,float16,0,11.583807627360025
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,40,40,128,1,float16,float16,0,5.712074915568034
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,40,40,128,1,float16,fp8,0,5.51848030090332
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,40,1,128,1,float16,float16,0,5.389861424763997
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,40,1,128,1,float16,fp8,0,5.066543896993001
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,40,2,128,1,float16,float16,0,5.530965169270833
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,40,2,128,1,float16,fp8,0,5.31658140818278
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,40,8,128,1,float16,fp8,0,10.47709846496582
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,40,8,128,1,float16,float16,0,11.483989715576172
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,40,4,128,1,float16,float16,0,5.411397298177083
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,40,40,128,1,float16,float16,0,2.784229278564453
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,40,4,128,1,float16,fp8,0,5.244074821472168
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,40,8,128,1,float16,float16,0,5.452016194661458
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,40,40,128,1,float16,fp8,0,2.8169333140055337
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,40,8,128,1,float16,fp8,0,5.238880157470703
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,40,1,128,1,float16,float16,0,2.538719971974691
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,40,1,128,1,float16,fp8,0,2.842106819152832
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,40,2,128,1,float16,float16,0,2.6032959620157876
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,40,2,128,1,float16,fp8,0,2.5905332565307617
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,40,4,128,1,float16,float16,0,2.669472058614095
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,40,4,128,1,float16,fp8,0,2.668890635172526
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,40,8,128,1,float16,float16,0,2.6344693501790366
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,40,8,128,1,float16,fp8,0,2.74232546488444
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,40,1,128,1,float16,float16,0,15.637354532877604
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,40,2,128,1,float16,fp8,0,14.119429270426432
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,40,2,128,1,float16,float16,0,15.896527608235678
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,40,1,128,1,float16,fp8,0,14.030661265055338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,40,4,128,1,float16,fp8,0,14.542181650797525
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,40,4,128,1,float16,float16,0,16.22886912027995
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,40,8,128,1,float16,float16,0,16.229658762613933
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,40,40,128,1,float16,float16,0,8.39472516377767
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,40,1,128,1,float16,float16,0,7.809573491414388
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,40,1,128,1,float16,fp8,0,7.087237040201823
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,40,40,128,1,float16,fp8,0,7.821760177612305
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,40,2,128,1,float16,float16,0,7.745199839274089
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,40,2,128,1,float16,fp8,0,7.306298573811849
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,40,4,128,1,float16,float16,0,7.996538798014323
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,40,40,128,1,float16,float16,0,3.855168024698893
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,40,40,128,1,float16,fp8,0,4.295589447021484
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,40,1,128,1,float16,float16,0,3.597402572631836
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,40,1,128,1,float16,fp8,0,3.6909173329671225
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,40,4,128,1,float16,fp8,0,7.534773508707683
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,40,8,128,1,float16,float16,0,8.01202646891276
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,40,8,128,1,float16,fp8,0,7.350954691569011
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,40,8,128,1,float16,fp8,0,14.581578572591146
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,40,2,128,1,float16,float16,0,3.737936019897461
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,40,2,128,1,float16,fp8,0,3.595930735270182
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,40,4,128,1,float16,float16,0,3.8838399251302085
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,40,40,128,1,float16,float16,0,1.9661119778951008
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,40,4,128,1,float16,fp8,0,3.7588160832722983
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,40,40,128,1,float16,fp8,0,2.028864065806071
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,40,1,128,1,float16,float16,0,1.8430986404418945
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,40,8,128,1,float16,float16,0,3.680405298868815
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,40,8,128,1,float16,fp8,0,3.8549439112345376
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,40,1,128,1,float16,fp8,0,1.8308480580647786
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,40,2,128,1,float16,float16,0,1.8619146347045898
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,40,2,128,1,float16,fp8,0,1.8422293663024902
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,40,4,128,1,float16,float16,0,1.8644053141276042
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,40,4,128,1,float16,fp8,0,1.903333346048991
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,40,8,128,1,float16,float16,0,1.9563093185424805
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,40,8,128,1,float16,fp8,0,2.049056053161621
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,40,1,128,1,float16,fp8,0,18.41769027709961
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,40,1,128,1,float16,float16,0,20.398624420166016
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,40,2,128,1,float16,float16,0,20.915152231852215
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,40,2,128,1,float16,fp8,0,18.413616180419922
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,40,4,128,1,float16,float16,0,20.994778951009113
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,40,4,128,1,float16,fp8,0,19.067338307698567
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,40,8,128,1,float16,fp8,0,19.234842936197918
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,40,8,128,1,float16,float16,0,21.293071746826172
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,40,40,128,1,float16,float16,0,11.074405670166016
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,40,40,128,1,float16,fp8,0,10.484544118245443
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,40,1,128,1,float16,float16,0,10.180890401204428
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,40,1,128,1,float16,fp8,0,9.223397572835287
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,40,2,128,1,float16,float16,0,10.143386840820312
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,40,2,128,1,float16,fp8,0,9.224010467529297
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,40,4,128,1,float16,float16,0,10.453823725382486
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,40,4,128,1,float16,fp8,0,9.678757349650065
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,40,40,128,1,float16,float16,0,5.344117482503255
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,40,1,128,1,float16,float16,0,4.890656153361003
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,40,1,128,1,float16,fp8,0,4.626325289408366
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,40,40,128,1,float16,fp8,0,5.224474589029948
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,40,2,128,1,float16,float16,0,5.011743863423665
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,40,2,128,1,float16,fp8,0,4.691173235575358
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,40,8,128,1,float16,fp8,0,9.69040552775065
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,40,8,128,1,float16,float16,0,10.675931294759115
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,40,40,128,1,float16,float16,0,2.6055893898010254
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,40,40,128,1,float16,fp8,0,2.66484800974528
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,40,4,128,1,float16,float16,0,4.903631846110026
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,40,4,128,1,float16,fp8,0,5.045632044474284
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,40,8,128,1,float16,float16,0,5.248330752054851
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,40,1,128,1,float16,float16,0,2.3365920384724936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,40,8,128,1,float16,fp8,0,4.847834587097168
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,40,1,128,1,float16,fp8,0,2.881930669148763
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,40,2,128,1,float16,float16,0,2.328810691833496
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,40,2,128,1,float16,fp8,0,2.380341370900472
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,40,4,128,1,float16,float16,0,2.4853493372599282
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,40,4,128,1,float16,fp8,0,2.4846827189127603
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,40,40,128,1,float16,float16,0,1.3207093079884846
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,40,40,128,1,float16,fp8,0,1.3862613042195637
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,40,8,128,1,float16,float16,0,2.641119956970215
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,40,8,128,1,float16,fp8,0,2.692223866780599
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,40,1,128,1,float16,float16,0,1.1924479802449544
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,40,1,128,1,float16,fp8,0,1.2208960056304932
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,40,2,128,1,float16,float16,0,1.2272213300069172
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,40,2,128,1,float16,fp8,0,1.2330079873402913
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,40,4,128,1,float16,float16,0,1.2356853485107422
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,40,4,128,1,float16,fp8,0,1.2774666945139568
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,40,8,128,1,float16,float16,0,1.2545066674550374
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,40,8,128,1,float16,fp8,0,1.2930773099263508
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,40,1,128,1,float16,float16,0,11.80893325805664
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,40,1,128,1,float16,fp8,0,10.570106506347656
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,40,2,128,1,float16,fp8,0,10.780928293863932
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,40,2,128,1,float16,float16,0,11.799087524414062
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,40,4,128,1,float16,fp8,0,11.318890889485678
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,40,4,128,1,float16,float16,0,12.449690500895182
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,40,8,128,1,float16,fp8,0,11.326240539550781
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,40,8,128,1,float16,float16,0,12.634090423583984
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,40,40,128,1,float16,float16,0,6.436597188313802
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,40,1,128,1,float16,float16,0,5.588842391967773
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,40,40,128,1,float16,fp8,0,6.249637603759766
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,40,1,128,1,float16,fp8,0,5.343760172526042
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,40,2,128,1,float16,float16,0,5.572256088256836
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,40,2,128,1,float16,fp8,0,5.417535781860352
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,40,4,128,1,float16,float16,0,5.983466466267903
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,40,40,128,1,float16,float16,0,3.0640745162963867
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,40,1,128,1,float16,float16,0,2.657408078511556
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,40,40,128,1,float16,fp8,0,3.300800005594889
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,40,1,128,1,float16,fp8,0,2.949082692464193
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,40,8,128,1,float16,fp8,0,5.69276237487793
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,40,8,128,1,float16,float16,0,6.132218678792317
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,40,4,128,1,float16,fp8,0,5.896176020304362
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,40,2,128,1,float16,float16,0,2.7064746220906577
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,40,2,128,1,float16,fp8,0,2.965205192565918
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,40,4,128,1,float16,float16,0,2.8126614888509116
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,40,4,128,1,float16,fp8,0,2.9339733123779297
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,40,8,128,1,float16,float16,0,2.811855951944987
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,40,40,128,1,float16,float16,0,1.5286506017049153
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,40,1,128,1,float16,fp8,0,1.4197972615559895
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,40,8,128,1,float16,fp8,0,3.1716105143229165
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,40,40,128,1,float16,fp8,0,1.8598453203837078
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,40,1,128,1,float16,float16,0,1.341983954111735
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,40,2,128,1,float16,float16,0,1.392757256825765
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,40,2,128,1,float16,fp8,0,1.4051574071248372
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,40,4,128,1,float16,float16,0,1.4358666737874348
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,40,8,128,1,float16,float16,0,1.4313599268595378
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,40,4,128,1,float16,fp8,0,1.4780480066935222
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,40,8,128,1,float16,fp8,0,1.4992639223734539
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,40,40,128,1,float16,float16,0,0.8047573566436768
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,40,40,128,1,float16,fp8,0,0.8391520182291666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,40,1,128,1,float16,float16,0,0.7048799991607666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,40,1,128,1,float16,fp8,0,0.7252746423085531
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,40,2,128,1,float16,float16,0,0.7206346988677979
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,40,2,128,1,float16,fp8,0,0.7351413567860922
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,40,4,128,1,float16,float16,0,0.7268640200297037
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,40,4,128,1,float16,fp8,0,0.7729706764221191
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,40,8,128,1,float16,float16,0,0.7396319707234701
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,40,8,128,1,float16,fp8,0,0.7664053440093994
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,40,1,128,1,float16,float16,0,11.126052856445312
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,40,1,128,1,float16,fp8,0,10.114714940388998
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,40,2,128,1,float16,float16,0,10.968133290608725
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,40,2,128,1,float16,fp8,0,10.2609494527181
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,40,4,128,1,float16,float16,0,11.440245310465494
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,40,4,128,1,float16,fp8,0,11.00042724609375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,40,8,128,1,float16,float16,0,11.016197204589844
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,40,8,128,1,float16,fp8,0,10.909024556477865
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,40,1,128,1,float16,float16,0,5.220975875854492
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,40,1,128,1,float16,fp8,0,5.092373212178548
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,40,40,128,1,float16,float16,0,6.216341018676758
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,40,2,128,1,float16,float16,0,5.315989176432292
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,40,40,128,1,float16,fp8,0,6.225535710652669
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,40,2,128,1,float16,fp8,0,5.1594133377075195
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,40,4,128,1,float16,float16,0,5.49018669128418
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,40,4,128,1,float16,fp8,0,5.490336100260417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,40,40,128,1,float16,float16,0,2.9752639134724936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,40,1,128,1,float16,float16,0,2.483466625213623
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,40,40,128,1,float16,fp8,0,3.2718931833902993
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,40,1,128,1,float16,fp8,0,2.5293332735697427
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,40,2,128,1,float16,float16,0,2.5813546180725098
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,40,8,128,1,float16,float16,0,5.625754674275716
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,40,2,128,1,float16,fp8,0,2.5885653495788574
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,40,8,128,1,float16,fp8,0,5.5293229420979815
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,40,4,128,1,float16,float16,0,2.6839946111043296
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,40,40,128,1,float16,float16,0,1.5790987014770508
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,40,8,128,1,float16,float16,0,2.6641173362731934
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,40,4,128,1,float16,fp8,0,2.827914555867513
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,40,1,128,1,float16,float16,0,1.2422719796498616
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,40,40,128,1,float16,fp8,0,1.6863306363423665
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,40,8,128,1,float16,fp8,0,2.7747198740641275
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,40,1,128,1,float16,fp8,0,1.2947946389516194
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,40,2,128,1,float16,float16,0,1.3106133143107097
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,40,2,128,1,float16,fp8,0,1.3278133074442546
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,40,4,128,1,float16,float16,0,1.3414719899495442
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,40,4,128,1,float16,fp8,0,1.4312693277994792
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,40,40,128,1,float16,float16,0,0.7752532958984375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,40,8,128,1,float16,float16,0,1.3503306706746419
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,40,40,128,1,float16,fp8,0,0.8252106507619222
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,40,8,128,1,float16,fp8,0,1.4999200503031414
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,40,1,128,1,float16,float16,0,0.647487998008728
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,40,2,128,1,float16,float16,0,0.6528799931208292
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,40,1,128,1,float16,fp8,0,0.6758933067321777
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,40,2,128,1,float16,fp8,0,0.6904906431833903
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,40,4,128,1,float16,float16,0,0.6955733299255371
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,40,4,128,1,float16,fp8,0,0.731328010559082
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,40,8,128,1,float16,float16,0,0.7002027034759521
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,40,8,128,1,float16,fp8,0,0.7538452943166097
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,40,40,128,1,float16,float16,0,0.40877866744995117
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,40,40,128,1,float16,fp8,0,0.4437013467152913
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,40,1,128,1,float16,float16,0,0.33767998218536377
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,40,1,128,1,float16,fp8,0,0.360703984896342
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,40,2,128,1,float16,float16,0,0.3484799861907959
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,40,2,128,1,float16,fp8,0,0.3664693435033162
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,40,4,128,1,float16,float16,0,0.36534400780995685
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,40,4,128,1,float16,fp8,0,0.3986186583836873
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,40,8,128,1,float16,float16,0,0.3729493220647176
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,40,8,128,1,float16,fp8,0,0.3991786638895671
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,40,1,128,1,float16,float16,0,6.240800221761067
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,40,2,128,1,float16,float16,0,6.365925470987956
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,40,1,128,1,float16,fp8,0,6.070869445800781
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,40,2,128,1,float16,fp8,0,6.124570846557617
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,40,4,128,1,float16,float16,0,6.387407938639323
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,40,4,128,1,float16,fp8,0,6.717978795369466
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,40,8,128,1,float16,float16,0,6.857242584228516
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,40,40,128,1,float16,float16,0,3.6986878712972007
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,40,1,128,1,float16,float16,0,2.936378796895345
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,40,1,128,1,float16,fp8,0,3.2018454869588218
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,40,40,128,1,float16,fp8,0,3.9387518564860025
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,40,2,128,1,float16,float16,0,2.982720057169596
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,40,8,128,1,float16,fp8,0,6.730730692545573
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,40,2,128,1,float16,fp8,0,3.186448097229004
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,40,4,128,1,float16,float16,0,3.151631991068522
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,40,8,128,1,float16,float16,0,3.2565441131591797
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,40,4,128,1,float16,fp8,0,3.433018684387207
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,40,1,128,1,float16,float16,0,1.4594613711039226
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,40,40,128,1,float16,fp8,0,1.9982080459594727
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,40,40,128,1,float16,float16,0,1.8445706367492676
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,40,1,128,1,float16,fp8,0,1.5428853034973145
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,40,8,128,1,float16,fp8,0,3.422687848409017
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,40,2,128,1,float16,float16,0,1.5025332768758137
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,40,2,128,1,float16,fp8,0,1.5891733169555664
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,40,4,128,1,float16,float16,0,1.6340640385945637
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,40,4,128,1,float16,fp8,0,1.7158293724060059
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,40,8,128,1,float16,float16,0,1.6272907257080078
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,40,1,128,1,float16,float16,0,0.7471199830373129
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,40,40,128,1,float16,float16,0,0.9233866532643636
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,40,40,128,1,float16,fp8,0,1.0246240297953289
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,40,8,128,1,float16,fp8,0,1.7172533671061199
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,40,1,128,1,float16,fp8,0,0.7921546300252279
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,40,2,128,1,float16,float16,0,0.7789226373036703
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,40,2,128,1,float16,fp8,0,0.8072266578674316
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,40,4,128,1,float16,float16,0,0.8178293704986572
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,40,4,128,1,float16,fp8,0,0.8922560214996338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,40,8,128,1,float16,float16,0,0.8252320289611816
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,40,40,128,1,float16,float16,0,0.48363733291625977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,40,8,128,1,float16,fp8,0,0.8973759810129801
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,40,40,128,1,float16,fp8,0,0.5355093479156494
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,40,1,128,1,float16,float16,0,0.3911999861399333
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,40,1,128,1,float16,fp8,0,0.41697601477305096
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,40,2,128,1,float16,float16,0,0.40298132101694745
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,40,2,128,1,float16,fp8,0,0.42716264724731445
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,40,4,128,1,float16,float16,0,0.42353065808614093
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,40,4,128,1,float16,fp8,0,0.4593120018641154
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,40,8,128,1,float16,float16,0,0.43483734130859375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,40,8,128,1,float16,fp8,0,0.47088531653086346
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,40,40,128,1,float16,float16,0,0.2645439902941386
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,40,40,128,1,float16,fp8,0,0.2928853432337443
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,40,1,128,1,float16,float16,0,0.21477333704630533
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,40,1,128,1,float16,fp8,0,0.2313493291536967
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,40,2,128,1,float16,float16,0,0.21900266408920288
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,40,2,128,1,float16,fp8,0,0.23392534255981445
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,40,4,128,1,float16,float16,0,0.23228265841801962
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,40,4,128,1,float16,fp8,0,0.2531413237253825
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,40,8,128,1,float16,float16,0,0.23848533630371094
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,40,8,128,1,float16,fp8,0,0.25701866547266644
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,40,1,128,1,float16,float16,0,5.8882293701171875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,40,1,128,1,float16,fp8,0,6.016037623087565
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,40,2,128,1,float16,float16,0,5.896176020304362
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,40,4,128,1,float16,float16,0,6.604272206624349
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,40,2,128,1,float16,fp8,0,6.141285578409831
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,40,4,128,1,float16,fp8,0,6.921258926391602
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,40,8,128,1,float16,float16,0,6.578970591227214
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,40,8,128,1,float16,fp8,0,6.819237391153972
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,40,40,128,1,float16,float16,0,3.8753172556559243
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,40,1,128,1,float16,float16,0,2.853621482849121
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,40,1,128,1,float16,fp8,0,3.1146666208902993
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,40,2,128,1,float16,float16,0,2.9178454081217446
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,40,40,128,1,float16,fp8,0,4.200122515360515
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,40,2,128,1,float16,fp8,0,3.1049280166625977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,40,4,128,1,float16,float16,0,3.2656799952189126
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,40,4,128,1,float16,fp8,0,3.4493227005004883
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,40,40,128,1,float16,float16,0,1.9002827008565266
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,40,8,128,1,float16,float16,0,3.284458796183268
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,40,1,128,1,float16,float16,0,1.4084800084431965
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,40,8,128,1,float16,fp8,0,3.576021194458008
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,40,1,128,1,float16,fp8,0,1.5232532819112141
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,40,40,128,1,float16,fp8,0,2.1783413887023926
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,40,4,128,1,float16,float16,0,1.6212533315022786
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,40,2,128,1,float16,float16,0,1.4987680117289226
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,40,2,128,1,float16,fp8,0,1.5593120257059734
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,40,4,128,1,float16,fp8,0,1.768074671427409
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,40,8,128,1,float16,float16,0,1.627471923828125
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,40,40,128,1,float16,fp8,0,1.0846400260925293
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,40,8,128,1,float16,fp8,0,1.7933600743611653
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,40,40,128,1,float16,float16,0,0.9665280183156332
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,40,1,128,1,float16,float16,0,0.7248053550720215
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,40,1,128,1,float16,fp8,0,0.782853364944458
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,40,4,128,1,float16,float16,0,0.8186133702596029
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,40,2,128,1,float16,fp8,0,0.803717295328776
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,40,2,128,1,float16,float16,0,0.7524800300598145
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,40,4,128,1,float16,fp8,0,0.9029119809468588
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,40,40,128,1,float16,fp8,0,0.5588800112406412
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,40,8,128,1,float16,float16,0,0.8255519866943359
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,40,8,128,1,float16,fp8,0,0.903557300567627
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,40,40,128,1,float16,float16,0,0.49604801336924237
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,40,2,128,1,float16,fp8,0,0.4212160110473633
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,40,1,128,1,float16,float16,0,0.3765866756439209
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,40,1,128,1,float16,fp8,0,0.4106186628341675
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,40,2,128,1,float16,float16,0,0.38843198617299396
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,40,4,128,1,float16,float16,0,0.4286239941914876
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,40,8,128,1,float16,fp8,0,0.47761066754659015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,40,4,128,1,float16,fp8,0,0.46619200706481934
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,40,8,128,1,float16,float16,0,0.4349973201751709
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,40,40,128,1,float16,float16,0,0.2699519991874695
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,40,40,128,1,float16,fp8,0,0.299621323744456
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,40,1,128,1,float16,float16,0,0.20021865765253702
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,40,1,128,1,float16,fp8,0,0.22271466255187988
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,40,2,128,1,float16,float16,0,0.21330666542053223
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,40,2,128,1,float16,fp8,0,0.22582934300104776
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,40,4,128,1,float16,float16,0,0.23080533742904663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,40,4,128,1,float16,fp8,0,0.25243733326594037
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,40,8,128,1,float16,float16,0,0.2360693415006002
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,40,8,128,1,float16,fp8,0,0.2557813326517741
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,40,40,128,1,float16,float16,0,0.1493226687113444
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,40,40,128,1,float16,fp8,0,0.16717867056528726
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,40,1,128,1,float16,float16,0,0.11097600062688191
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,40,1,128,1,float16,fp8,0,0.12230933705965678
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,40,2,128,1,float16,float16,0,0.11640000343322754
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,40,2,128,1,float16,fp8,0,0.12587199608484903
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,40,4,128,1,float16,float16,0,0.1272693375746409
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,40,4,128,1,float16,fp8,0,0.13621866703033447
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,40,8,128,1,float16,float16,0,0.13245333234469095
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,40,8,128,1,float16,fp8,0,0.14385066429773966
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,40,1,128,1,float16,fp8,0,3.7704906463623047
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,40,2,128,1,float16,float16,0,3.678127924601237
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,40,4,128,1,float16,float16,0,4.099605242411296
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,40,2,128,1,float16,fp8,0,3.8361600240071616
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,40,8,128,1,float16,float16,0,4.061903953552246
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,40,4,128,1,float16,fp8,0,4.390666643778483
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,40,1,128,1,float16,float16,0,3.4827092488606772
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,40,1,128,1,float16,float16,0,1.752463976542155
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,40,40,128,1,float16,float16,0,2.4452160199483237
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,40,1,128,1,float16,fp8,0,1.881765365600586
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,40,2,128,1,float16,float16,0,1.803210735321045
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,40,2,128,1,float16,fp8,0,1.9499626159667969
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,40,8,128,1,float16,fp8,0,4.373306592305501
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,40,4,128,1,float16,float16,0,2.0942773818969727
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,40,4,128,1,float16,fp8,0,2.2367307345072427
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,40,8,128,1,float16,float16,0,2.057999928792318
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,40,1,128,1,float16,float16,0,0.8703467051188151
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,40,40,128,1,float16,float16,0,1.2322239875793457
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,40,40,128,1,float16,fp8,0,1.414511998494466
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,40,8,128,1,float16,fp8,0,2.237274646759033
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,40,1,128,1,float16,fp8,0,0.9861599604288737
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,40,2,128,1,float16,float16,0,0.9303253491719564
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,40,4,128,1,float16,float16,0,1.0193760395050049
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,40,4,128,1,float16,fp8,0,1.1302826404571533
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,40,8,128,1,float16,float16,0,1.0418879985809326
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,40,40,128,1,float16,float16,0,0.6282186508178711
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,40,2,128,1,float16,fp8,0,1.003007968266805
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,40,40,128,1,float16,fp8,0,2.799941380818685
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,40,8,128,1,float16,fp8,0,1.1501279671986897
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,40,1,128,1,float16,float16,0,0.45079465707143146
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,40,40,128,1,float16,fp8,0,0.7271146774291992
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,40,1,128,1,float16,fp8,0,0.49351998170216876
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,40,2,128,1,float16,float16,0,0.4777546723683675
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,40,2,128,1,float16,fp8,0,0.5172319809595743
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,40,4,128,1,float16,float16,0,0.5308586756388346
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,40,8,128,1,float16,float16,0,0.5313706795374552
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,40,4,128,1,float16,fp8,0,0.5830719868342081
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,40,8,128,1,float16,fp8,0,0.5980480114618937
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,40,40,128,1,float16,float16,0,0.3311626712481181
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,40,40,128,1,float16,fp8,0,0.3786666790644328
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,40,1,128,1,float16,float16,0,0.24318933486938477
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,40,1,128,1,float16,fp8,0,0.2667253414789836
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,40,2,128,1,float16,float16,0,0.25540266434351605
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,40,2,128,1,float16,fp8,0,0.2701866626739502
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,40,4,128,1,float16,float16,0,0.2778720060984294
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,40,4,128,1,float16,fp8,0,0.30718400080998737
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,40,8,128,1,float16,fp8,0,0.31698666016260785
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,40,8,128,1,float16,float16,0,0.2862880031267802
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,40,40,128,1,float16,float16,0,0.180293341477712
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,40,40,128,1,float16,fp8,0,0.2090346614519755
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,40,1,128,1,float16,float16,0,0.13436266779899597
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,40,1,128,1,float16,fp8,0,0.14495999614397684
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,40,2,128,1,float16,float16,0,0.13876266280810037
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,40,2,128,1,float16,fp8,0,0.1509226659933726
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,40,4,128,1,float16,float16,0,0.15228799978892008
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,40,4,128,1,float16,fp8,0,0.17189866304397583
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,40,8,128,1,float16,float16,0,0.1560426652431488
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,40,8,128,1,float16,fp8,0,0.17521067460378012
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,40,2,128,1,float16,float16,0,0.07626666625340779
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,40,40,128,1,float16,float16,0,0.1021066705385844
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,40,40,128,1,float16,fp8,0,0.11788266897201538
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,40,1,128,1,float16,float16,0,0.07272000114123027
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,40,1,128,1,float16,fp8,0,0.08010133107503255
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,40,2,128,1,float16,fp8,0,0.08447999755541484
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,40,4,128,1,float16,float16,0,0.08210133512814839
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,40,4,128,1,float16,fp8,0,0.09168000022570293
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,40,8,128,1,float16,float16,0,0.08682133754094441
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,40,8,128,1,float16,fp8,0,0.09486400087674458
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,40,1,128,1,float16,float16,0,3.552000045776367
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,40,1,128,1,float16,fp8,0,3.9883734385172525
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,40,2,128,1,float16,float16,0,3.77239990234375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,40,2,128,1,float16,fp8,0,4.117658615112305
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,40,8,128,1,float16,float16,0,4.434960047403972
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,40,4,128,1,float16,fp8,0,4.871130625406901
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,40,4,128,1,float16,float16,0,4.338277180989583
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,40,8,128,1,float16,fp8,0,4.961914698282878
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,40,40,128,1,float16,float16,0,2.7445812225341797
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,40,1,128,1,float16,float16,0,1.7950560251871746
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,40,1,128,1,float16,fp8,0,2.034671942392985
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,40,40,128,1,float16,fp8,0,3.303231875101725
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,40,2,128,1,float16,float16,0,1.922986666361491
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,40,4,128,1,float16,float16,0,2.192848046620687
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,40,2,128,1,float16,fp8,0,2.0993119875590005
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,40,8,128,1,float16,float16,0,2.22927459081014
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,40,40,128,1,float16,float16,0,1.3660000165303547
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,40,4,128,1,float16,fp8,0,2.468186696370443
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,40,8,128,1,float16,fp8,0,2.542213280995687
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,40,1,128,1,float16,float16,0,0.9119573434193929
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,40,1,128,1,float16,fp8,0,1.0230826536814372
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,40,2,128,1,float16,float16,0,0.9786293506622314
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,40,40,128,1,float16,fp8,0,1.6433812777201335
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,40,2,128,1,float16,fp8,0,1.062986691792806
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,40,4,128,1,float16,fp8,0,1.2597013314565022
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,40,4,128,1,float16,float16,0,1.1149386564890544
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,40,40,128,1,float16,float16,0,0.7000906467437744
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,40,8,128,1,float16,fp8,0,1.2879679997762044
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,40,40,128,1,float16,fp8,0,0.8359519640604655
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,40,1,128,1,float16,float16,0,0.4702293475468953
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,40,1,128,1,float16,fp8,0,0.5264053344726562
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,40,2,128,1,float16,float16,0,0.5060426791508993
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,40,2,128,1,float16,fp8,0,0.545146663983663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,40,4,128,1,float16,float16,0,0.5777386824289957
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,40,8,128,1,float16,float16,0,1.1159466902414958
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,40,8,128,1,float16,float16,0,0.5740213394165039
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,40,4,128,1,float16,fp8,0,0.6389546791712443
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,40,8,128,1,float16,fp8,0,0.6516106526056925
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,40,40,128,1,float16,float16,0,0.3635466496149699
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,40,40,128,1,float16,fp8,0,0.43776532014211017
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,40,1,128,1,float16,float16,0,0.2463093400001526
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,40,1,128,1,float16,fp8,0,0.2781599958737691
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,40,2,128,1,float16,float16,0,0.2659306724866231
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,40,2,128,1,float16,fp8,0,0.2873813311258952
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,40,4,128,1,float16,float16,0,0.298634668191274
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,40,4,128,1,float16,fp8,0,0.33560001850128174
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,40,8,128,1,float16,fp8,0,0.34962666034698486
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,40,8,128,1,float16,float16,0,0.30473599831263226
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,40,2,128,1,float16,float16,0,0.14484799901644388
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,40,40,128,1,float16,float16,0,0.19533334175745645
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,40,40,128,1,float16,fp8,0,0.2341653307278951
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,40,1,128,1,float16,float16,0,0.1344586710135142
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,40,1,128,1,float16,fp8,0,0.1502400040626526
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,40,2,128,1,float16,fp8,0,0.15820266803105673
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,40,4,128,1,float16,float16,0,0.16205333669980368
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,40,8,128,1,float16,float16,0,0.16380799810091654
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,40,4,128,1,float16,fp8,0,0.18225600322087607
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,40,8,128,1,float16,fp8,0,0.18794665733973184
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,40,2,128,1,float16,float16,0,0.08010666569073994
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,40,2,128,1,float16,fp8,0,0.08761599659919739
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,40,40,128,1,float16,float16,0,0.10827733079592387
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,40,40,128,1,float16,fp8,0,0.13246933619181314
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,40,1,128,1,float16,float16,0,0.07558399935563405
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,40,1,128,1,float16,fp8,0,0.08398933211962382
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,40,4,128,1,float16,float16,0,0.09034132957458496
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,40,4,128,1,float16,fp8,0,0.09907199939092
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,40,8,128,1,float16,float16,0,0.09141332904497783
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,40,8,128,1,float16,fp8,0,0.10517332951227824
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,40,40,128,1,float16,float16,0,0.06485866506894429
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,40,40,128,1,float16,fp8,0,0.0766133318344752
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,40,1,128,1,float16,float16,0,0.04539200166861216
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,40,1,128,1,float16,fp8,0,0.051813334226608276
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,40,2,128,1,float16,float16,0,0.04739200075467428
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,40,2,128,1,float16,fp8,0,0.05474133292833964
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,40,4,128,1,float16,float16,0,0.05032533407211304
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,40,4,128,1,float16,fp8,0,0.05663999915122986
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,40,8,128,1,float16,float16,0,0.05072533090909322
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,40,8,128,1,float16,fp8,0,0.059952000776926674
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,40,1,128,1,float16,float16,0,2.618607997894287
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,40,2,128,1,float16,float16,0,2.8061278661092124
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,40,1,128,1,float16,fp8,0,3.056447982788086
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,40,2,128,1,float16,fp8,0,3.1701653798421225
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,40,40,128,1,float16,float16,0,2.2208639780680337
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,40,4,128,1,float16,float16,0,3.408229192097982
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,40,4,128,1,float16,fp8,0,3.944618542989095
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,40,1,128,1,float16,float16,0,1.3374773661295574
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,40,1,128,1,float16,fp8,0,1.5259466171264648
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,40,8,128,1,float16,fp8,0,4.1009171803792315
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,40,40,128,1,float16,fp8,0,2.906911849975586
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,40,2,128,1,float16,float16,0,1.4233066240946453
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,40,8,128,1,float16,float16,0,3.4490880966186523
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,40,40,128,1,float16,float16,0,1.126960039138794
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,40,4,128,1,float16,fp8,0,2.0053812662760415
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,40,4,128,1,float16,float16,0,1.72106138865153
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,40,2,128,1,float16,fp8,0,1.6034453709920247
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,40,8,128,1,float16,fp8,0,2.082373301188151
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,40,8,128,1,float16,float16,0,1.7368213335673015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,40,40,128,1,float16,fp8,0,1.4679360389709473
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,40,1,128,1,float16,fp8,0,0.779317299524943
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,40,4,128,1,float16,float16,0,0.8673439820607504
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,40,2,128,1,float16,float16,0,0.7345333099365234
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,40,1,128,1,float16,float16,0,0.6752266883850098
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,40,2,128,1,float16,fp8,0,0.8136693636576334
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,40,40,128,1,float16,float16,0,0.5797546704610189
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,40,8,128,1,float16,float16,0,0.8862079779307047
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,40,4,128,1,float16,fp8,0,1.018346627553304
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,40,1,128,1,float16,float16,0,0.3497333526611328
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,40,40,128,1,float16,fp8,0,0.7435146967569987
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,40,1,128,1,float16,fp8,0,0.40108799934387207
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,40,8,128,1,float16,fp8,0,1.0366346836090088
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,40,2,128,1,float16,float16,0,0.38497066497802734
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,40,4,128,1,float16,float16,0,0.4465013345082601
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,40,4,128,1,float16,fp8,0,0.5266453425089518
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,40,8,128,1,float16,float16,0,0.44873066743214923
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,40,40,128,1,float16,fp8,0,0.386735995610555
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,40,1,128,1,float16,float16,0,0.18501333395640054
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,40,2,128,1,float16,fp8,0,0.4210826555887858
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,40,8,128,1,float16,fp8,0,0.5392053524653116
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,40,1,128,1,float16,fp8,0,0.2126026749610901
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,40,40,128,1,float16,float16,0,0.29817066589991253
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,40,2,128,1,float16,float16,0,0.2004959980646769
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,40,2,128,1,float16,fp8,0,0.2239039937655131
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,40,4,128,1,float16,fp8,0,0.2746079961458842
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,40,4,128,1,float16,float16,0,0.23430399099985758
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,40,8,128,1,float16,float16,0,0.2399466633796692
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,40,8,128,1,float16,fp8,0,0.28827200333277386
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,40,40,128,1,float16,float16,0,0.16032532850901285
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,40,2,128,1,float16,float16,0,0.1104159951210022
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,40,40,128,1,float16,fp8,0,0.20494399468104044
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,40,1,128,1,float16,float16,0,0.10247466961542766
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,40,1,128,1,float16,fp8,0,0.11658666531244914
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,40,2,128,1,float16,fp8,0,0.12410133083661397
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,40,4,128,1,float16,float16,0,0.12603200475374857
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,40,4,128,1,float16,fp8,0,0.14972266554832458
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,40,8,128,1,float16,float16,0,0.1284266710281372
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,40,8,128,1,float16,fp8,0,0.1553973356882731
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,40,40,128,1,float16,float16,0,0.0895146628220876
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,40,40,128,1,float16,fp8,0,0.11402666568756104
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,40,1,128,1,float16,float16,0,0.05745066702365875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,40,1,128,1,float16,fp8,0,0.06477333108584087
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,40,2,128,1,float16,float16,0,0.06321066617965698
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,40,2,128,1,float16,fp8,0,0.06991466879844666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,40,4,128,1,float16,float16,0,0.07109866539637248
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,40,8,128,1,float16,fp8,0,0.08730133374532063
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,40,8,128,1,float16,float16,0,0.0718560020128886
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,40,4,128,1,float16,fp8,0,0.08117333551247914
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,40,40,128,1,float16,float16,0,0.053301334381103516
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,40,1,128,1,float16,float16,0,0.03573866685231527
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,40,1,128,1,float16,fp8,0,0.04186133543650309
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,40,2,128,1,float16,float16,0,0.036544000109036766
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,40,40,128,1,float16,fp8,0,0.06737599770228068
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,40,2,128,1,float16,fp8,0,0.04281599819660187
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,40,4,128,1,float16,float16,0,0.039077334105968475
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,40,4,128,1,float16,fp8,0,0.04711466530958811
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,40,8,128,1,float16,float16,0,0.040021332601706185
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,40,8,128,1,float16,fp8,0,0.04780266682306925
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,40,40,128,1,float16,float16,0,0.029535998900731403
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,40,40,128,1,float16,fp8,0,0.037418665985266365
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,40,1,128,1,float16,float16,0,0.024901332954565685
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,40,1,128,1,float16,fp8,0,0.030421334008375805
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,40,8,128,1,float16,float16,0,0.026762666801611584
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,40,2,128,1,float16,float16,0,0.025066666305065155
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,40,2,128,1,float16,fp8,0,0.030239999294281006
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,40,4,128,1,float16,float16,0,0.02665599932273229
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,40,4,128,1,float16,fp8,0,0.032933334509531655
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,40,8,128,1,float16,fp8,0,0.033258666594823204
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,40,1,128,1,float16,float16,0,1.0968106587727864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,40,1,128,1,float16,fp8,0,1.29202135403951
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,40,2,128,1,float16,float16,0,1.2019200325012207
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,40,2,128,1,float16,fp8,0,1.3594932556152344
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,40,4,128,1,float16,float16,0,1.4968479474385579
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,40,40,128,1,float16,float16,0,1.0116639931996663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,40,4,128,1,float16,fp8,0,1.7673865954081218
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,40,1,128,1,float16,float16,0,0.5602133274078369
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,40,8,128,1,float16,float16,0,1.5068906148274739
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,40,40,128,1,float16,fp8,0,1.2676640351613362
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,40,1,128,1,float16,fp8,0,0.657034675280253
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,40,8,128,1,float16,fp8,0,1.7992639541625977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,40,2,128,1,float16,float16,0,0.626261313756307
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,40,2,128,1,float16,fp8,0,0.696330706278483
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,40,4,128,1,float16,float16,0,0.7550613085428873
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,40,4,128,1,float16,fp8,0,0.8993120193481445
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,40,40,128,1,float16,float16,0,0.5168053309122721
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,40,8,128,1,float16,float16,0,0.7534879843393961
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,40,8,128,1,float16,fp8,0,0.9038240114847819
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,40,1,128,1,float16,float16,0,0.291375994682312
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,40,1,128,1,float16,fp8,0,0.344650665918986
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,40,2,128,1,float16,float16,0,0.321615993976593
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,40,2,128,1,float16,fp8,0,0.36024534702301025
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,40,4,128,1,float16,float16,0,0.38891200224558514
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,40,4,128,1,float16,fp8,0,0.4660960038503011
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,40,8,128,1,float16,float16,0,0.390341321627299
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,40,40,128,1,float16,float16,0,0.27109867334365845
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,40,40,128,1,float16,fp8,0,0.3354719877243042
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,40,8,128,1,float16,fp8,0,0.47333868344624835
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,40,1,128,1,float16,float16,0,0.15294933319091797
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,40,40,128,1,float16,fp8,0,0.6446400086085001
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,40,1,128,1,float16,fp8,0,0.18219733238220215
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,40,2,128,1,float16,float16,0,0.17185600598653158
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,40,2,128,1,float16,fp8,0,0.1909546653429667
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,40,4,128,1,float16,fp8,0,0.2444480061531067
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,40,4,128,1,float16,float16,0,0.2050666610399882
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,40,8,128,1,float16,float16,0,0.20835200945536295
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,40,40,128,1,float16,float16,0,0.14378666877746582
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,40,8,128,1,float16,fp8,0,0.2534826596577962
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,40,40,128,1,float16,fp8,0,0.1808746655782064
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,40,1,128,1,float16,float16,0,0.08598400155703227
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,40,1,128,1,float16,fp8,0,0.10160533587137859
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,40,2,128,1,float16,float16,0,0.0941546658674876
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,40,2,128,1,float16,fp8,0,0.1069599986076355
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,40,4,128,1,float16,float16,0,0.10923199852307637
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,40,4,128,1,float16,fp8,0,0.13320533434549967
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,40,8,128,1,float16,float16,0,0.11106666922569275
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,40,8,128,1,float16,fp8,0,0.1365653375784556
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,40,40,128,1,float16,float16,0,0.08123200138409932
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,40,40,128,1,float16,fp8,0,0.10084799925486247
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,40,1,128,1,float16,float16,0,0.048207998275756836
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,40,1,128,1,float16,fp8,0,0.05489066739877065
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,40,2,128,1,float16,float16,0,0.05286933481693268
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,40,2,128,1,float16,fp8,0,0.060133333007494606
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,40,4,128,1,float16,float16,0,0.06132266422112783
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,40,8,128,1,float16,float16,0,0.06337066491444905
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,40,4,128,1,float16,fp8,0,0.07151466608047485
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,40,8,128,1,float16,fp8,0,0.07683733105659485
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,40,40,128,1,float16,float16,0,0.048112000028292336
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,40,40,128,1,float16,fp8,0,0.06144533554712931
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,40,1,128,1,float16,float16,0,0.028922667105992634
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,40,1,128,1,float16,fp8,0,0.03522666543722153
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,40,2,128,1,float16,float16,0,0.03070933371782303
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,40,2,128,1,float16,fp8,0,0.03570133447647095
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,40,4,128,1,float16,float16,0,0.03374933451414108
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,40,4,128,1,float16,fp8,0,0.04170133173465729
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,40,8,128,1,float16,float16,0,0.03359466542800268
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,40,40,128,1,float16,float16,0,0.026213333010673523
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,40,40,128,1,float16,fp8,0,0.03453333427508672
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,40,8,128,1,float16,fp8,0,0.04174399872620901
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,40,1,128,1,float16,float16,0,0.022005334496498108
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,40,1,128,1,float16,fp8,0,0.02647999922434489
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,40,2,128,1,float16,float16,0,0.022384000321229298
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,40,2,128,1,float16,fp8,0,0.02644266684850057
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,40,4,128,1,float16,float16,0,0.023584000766277313
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,40,4,128,1,float16,fp8,0,0.029114666084448498
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,40,8,128,1,float16,float16,0,0.02369600037733714
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,40,8,128,1,float16,fp8,0,0.029301332930723827
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,40,40,128,1,float16,float16,0,0.018565333137909572
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,40,40,128,1,float16,fp8,0,0.023530667026837666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,40,1,128,1,float16,float16,0,0.016549333930015564
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,40,1,128,1,float16,fp8,0,0.019871999820073444
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,40,2,128,1,float16,float16,0,0.016730666160583496
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,40,2,128,1,float16,fp8,0,0.019402666638294857
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,40,4,128,1,float16,float16,0,0.016858667135238647
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,40,4,128,1,float16,fp8,0,0.020576000213623047
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,40,8,128,1,float16,float16,0,0.0169813334941864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,40,8,128,1,float16,fp8,0,0.021040000021457672
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,40,1,128,1,float16,float16,0,0.5231306552886963
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,40,1,128,1,float16,fp8,0,0.5737386544545492
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,40,2,128,1,float16,float16,0,0.5806666612625122
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,40,4,128,1,float16,float16,0,0.7053759892781576
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,40,2,128,1,float16,fp8,0,0.6214346488316854
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,40,4,128,1,float16,fp8,0,0.7963519891103109
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,40,40,128,1,float16,float16,0,0.4904640118281047
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,40,40,128,1,float16,fp8,0,0.5748000144958496
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,40,8,128,1,float16,float16,0,0.7316640218098959
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,40,8,128,1,float16,fp8,0,0.8095733324686686
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,40,1,128,1,float16,float16,0,0.26992533604304
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,40,1,128,1,float16,fp8,0,0.30740267038345337
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,40,2,128,1,float16,float16,0,0.300927996635437
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,40,2,128,1,float16,fp8,0,0.3251413305600484
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,40,4,128,1,float16,float16,0,0.3683520158131917
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,40,4,128,1,float16,fp8,0,0.4267786741256714
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,40,8,128,1,float16,float16,0,0.36999468008677167
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,40,40,128,1,float16,float16,0,0.25438400109608966
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,40,8,128,1,float16,fp8,0,0.42827733357747394
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,40,2,128,1,float16,float16,0,0.1622933348019918
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,40,40,128,1,float16,fp8,0,0.2993013262748718
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,40,1,128,1,float16,float16,0,0.14510400096575418
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,40,1,128,1,float16,fp8,0,0.16644799709320068
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,40,2,128,1,float16,fp8,0,0.17864000797271729
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,40,4,128,1,float16,float16,0,0.19369600216547647
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,40,4,128,1,float16,fp8,0,0.22267200549443564
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,40,8,128,1,float16,float16,0,0.19782400131225586
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,40,40,128,1,float16,float16,0,0.1383680005868276
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,40,40,128,1,float16,fp8,0,0.15981333454449972
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,40,1,128,1,float16,float16,0,0.08293333152929942
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,40,8,128,1,float16,fp8,0,0.23252266645431519
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,40,1,128,1,float16,fp8,0,0.09338133533795674
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,40,2,128,1,float16,float16,0,0.09071466326713562
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,40,2,128,1,float16,fp8,0,0.09929066896438599
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,40,4,128,1,float16,float16,0,0.10671466588973999
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,40,4,128,1,float16,fp8,0,0.12205866972605388
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,40,8,128,1,float16,float16,0,0.10739200313886006
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,40,8,128,1,float16,fp8,0,0.1241439978281657
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,40,40,128,1,float16,float16,0,0.07815999786059062
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,40,40,128,1,float16,fp8,0,0.08874666690826416
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,40,1,128,1,float16,float16,0,0.044026667873064675
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,40,1,128,1,float16,fp8,0,0.04906666775544485
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,40,2,128,1,float16,float16,0,0.04952000081539154
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,40,2,128,1,float16,fp8,0,0.05036800106366476
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,40,4,128,1,float16,float16,0,0.05738133192062378
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,40,4,128,1,float16,fp8,0,0.05977599819501241
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,40,8,128,1,float16,float16,0,0.06141866743564606
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,40,8,128,1,float16,fp8,0,0.06797333558400472
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,40,40,128,1,float16,float16,0,0.04645333190759023
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,40,40,128,1,float16,fp8,0,0.05177066723505656
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,40,1,128,1,float16,float16,0,0.026506667335828144
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,40,1,128,1,float16,fp8,0,0.03089066594839096
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,40,2,128,1,float16,float16,0,0.0276053324341774
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,40,2,128,1,float16,fp8,0,0.032298666735490165
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,40,4,128,1,float16,float16,0,0.030640001098314922
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,40,4,128,1,float16,fp8,0,0.03562666724125544
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,40,8,128,1,float16,float16,0,0.030773334205150604
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,40,8,128,1,float16,fp8,0,0.03572266548871994
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,40,40,128,1,float16,float16,0,0.024853333830833435
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,40,40,128,1,float16,fp8,0,0.028751999139785767
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,40,1,128,1,float16,float16,0,0.020474666108687718
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,40,1,128,1,float16,fp8,0,0.02383466561635335
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,40,2,128,1,float16,fp8,0,0.024192000428835552
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,40,2,128,1,float16,float16,0,0.02062400057911873
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,40,4,128,1,float16,float16,0,0.021984001000722248
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,40,4,128,1,float16,fp8,0,0.025775998830795288
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,40,8,128,1,float16,float16,0,0.021850667893886566
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,40,8,128,1,float16,fp8,0,0.025983999172846477
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,40,40,128,1,float16,float16,0,0.016965333372354507
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,40,40,128,1,float16,fp8,0,0.019845332950353622
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,40,1,128,1,float16,float16,0,0.015306666493415833
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,40,1,128,1,float16,fp8,0,0.01786133274435997
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,40,2,128,1,float16,float16,0,0.015562667200962702
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,40,2,128,1,float16,fp8,0,0.017946666727463405
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,40,4,128,1,float16,float16,0,0.01562133307258288
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,40,4,128,1,float16,fp8,0,0.018058666338523228
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,40,8,128,1,float16,float16,0,0.01573333392540614
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,40,8,128,1,float16,fp8,0,0.017759999881188076
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,40,40,128,1,float16,float16,0,0.016010666886965435
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,40,40,128,1,float16,fp8,0,0.018751999984184902
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,40,1,128,1,float16,float16,0,0.015040000279744467
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,40,1,128,1,float16,fp8,0,0.017231999586025875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,40,2,128,1,float16,float16,0,0.014773332824309668
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,40,2,128,1,float16,fp8,0,0.017312000195185345
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,40,4,128,1,float16,float16,0,0.014938666174809137
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,40,4,128,1,float16,fp8,0,0.017530667285124462
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,40,8,128,1,float16,float16,0,0.015082667271296183
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,40,8,128,1,float16,fp8,0,0.017829333742459614
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,40,1,128,1,float16,float16,0,0.26742400725682575
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,40,1,128,1,float16,fp8,0,0.30507200956344604
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,40,2,128,1,float16,float16,0,0.2966826756795247
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,40,2,128,1,float16,fp8,0,0.3253386616706848
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,40,4,128,1,float16,float16,0,0.3640693426132202
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,40,40,128,1,float16,float16,0,0.28970666726430255
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,40,4,128,1,float16,fp8,0,0.42293866475423175
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,40,8,128,1,float16,fp8,0,0.4622666835784912
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,40,8,128,1,float16,float16,0,0.39813868204752606
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,40,1,128,1,float16,float16,0,0.1439413328965505
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,40,1,128,1,float16,fp8,0,0.16680532693862915
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,40,2,128,1,float16,float16,0,0.16193600495656332
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,40,4,128,1,float16,float16,0,0.1922826568285624
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,40,2,128,1,float16,fp8,0,0.17796266078948975
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,40,8,128,1,float16,fp8,0,0.24547199408213297
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,40,8,128,1,float16,float16,0,0.21225066979726157
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,40,40,128,1,float16,fp8,0,0.15736533204714456
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,40,40,128,1,float16,float16,0,0.15321066975593567
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,40,4,128,1,float16,fp8,0,0.2234506607055664
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,40,1,128,1,float16,float16,0,0.08267199993133545
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,40,2,128,1,float16,float16,0,0.0902346670627594
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,40,1,128,1,float16,fp8,0,0.09286399682362874
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,40,2,128,1,float16,fp8,0,0.0990773340066274
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,40,4,128,1,float16,float16,0,0.10681600371996562
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,40,8,128,1,float16,float16,0,0.11539733409881592
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,40,4,128,1,float16,fp8,0,0.12196266651153564
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,40,8,128,1,float16,fp8,0,0.13408533732096353
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,40,40,128,1,float16,float16,0,0.08476799726486206
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,40,40,128,1,float16,fp8,0,0.29791466395060223
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,40,1,128,1,float16,float16,0,0.042863999803860985
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,40,1,128,1,float16,fp8,0,0.046666666865348816
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,40,40,128,1,float16,fp8,0,0.08667733271916707
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,40,2,128,1,float16,float16,0,0.049253334601720176
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,40,2,128,1,float16,fp8,0,0.04930133124192556
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,40,4,128,1,float16,float16,0,0.057445332407951355
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,40,4,128,1,float16,fp8,0,0.06368533273537953
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,40,8,128,1,float16,fp8,0,0.07206400235493977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,40,40,128,1,float16,float16,0,0.04839999973773956
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,40,1,128,1,float16,float16,0,0.026416001220544178
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,40,1,128,1,float16,fp8,0,0.030762667457262676
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,40,40,128,1,float16,fp8,0,0.04610666632652283
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,40,2,128,1,float16,float16,0,0.027632000545660656
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,40,2,128,1,float16,fp8,0,0.032501332461833954
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,40,4,128,1,float16,fp8,0,0.0355679988861084
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,40,4,128,1,float16,float16,0,0.030133334298928578
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,40,8,128,1,float16,float16,0,0.03068266560633977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,40,1,128,1,float16,fp8,0,0.023904000719388325
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,40,1,128,1,float16,float16,0,0.020410666863123577
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,40,40,128,1,float16,fp8,0,0.025392000873883564
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,40,2,128,1,float16,float16,0,0.020288000504175823
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,40,8,128,1,float16,fp8,0,0.03615466753641764
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,40,40,128,1,float16,float16,0,0.02479466547568639
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,40,2,128,1,float16,fp8,0,0.02402666707833608
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,40,8,128,1,float16,float16,0,0.06446399788061778
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,40,4,128,1,float16,float16,0,0.021882665654023487
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,40,4,128,1,float16,fp8,0,0.025637333591779072
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,40,8,128,1,float16,float16,0,0.02197333425283432
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,40,8,128,1,float16,fp8,0,0.025349333882331848
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,40,40,128,1,float16,float16,0,0.018079999834299088
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,40,1,128,1,float16,float16,0,0.015184000134468079
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,40,40,128,1,float16,fp8,0,0.01877333347996076
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,40,1,128,1,float16,fp8,0,0.01806933308641116
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,40,2,128,1,float16,float16,0,0.01544533297419548
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,40,2,128,1,float16,fp8,0,0.01801066721479098
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,40,4,128,1,float16,float16,0,0.015658666690190632
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,40,4,128,1,float16,fp8,0,0.017616000026464462
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,40,8,128,1,float16,float16,0,0.015573333948850632
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,40,8,128,1,float16,fp8,0,0.0180479995906353
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,40,40,128,1,float16,fp8,0,0.01481066644191742
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,40,40,128,1,float16,float16,0,0.014090667168299357
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,40,1,128,1,float16,float16,0,0.01504533365368843
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,40,1,128,1,float16,fp8,0,0.017407999684413273
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,40,2,128,1,float16,float16,0,0.014666666587193808
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,40,2,128,1,float16,fp8,0,0.01798933371901512
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,40,4,128,1,float16,float16,0,0.015274666249752045
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,40,4,128,1,float16,fp8,0,0.017375999440749485
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,40,8,128,1,float16,float16,0,0.01492799942692121
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,40,8,128,1,float16,fp8,0,0.017952000101407368
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,40,40,128,1,float16,float16,0,0.01379199946920077
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,40,40,128,1,float16,fp8,0,0.014501333236694336
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,40,1,128,1,float16,float16,0,0.014458666245142618
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,40,1,128,1,float16,fp8,0,0.016399999459584553
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,40,4,128,1,float16,fp8,0,0.01727466657757759
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,40,2,128,1,float16,float16,0,0.014538666854302088
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,40,2,128,1,float16,fp8,0,0.016714667280515034
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,40,4,128,1,float16,float16,0,0.014458666245142618
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,40,8,128,1,float16,float16,0,0.014607999473810196
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,40,8,128,1,float16,fp8,0,0.01720000058412552
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,40,1,128,1,float16,float16,0,0.14523733655611673
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,40,1,128,1,float16,fp8,0,0.1667733391125997
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,40,2,128,1,float16,float16,0,0.1600320041179657
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,40,2,128,1,float16,fp8,0,0.17835734287897745
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,40,4,128,1,float16,float16,0,0.206661323706309
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,40,4,128,1,float16,fp8,0,0.24061866601308188
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,40,8,128,1,float16,float16,0,0.24371200799942017
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,40,40,128,1,float16,fp8,0,0.2157813310623169
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,40,40,128,1,float16,float16,0,0.21533334255218506
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,40,1,128,1,float16,float16,0,0.08267733454704285
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,40,1,128,1,float16,fp8,0,0.09338666995366414
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,40,8,128,1,float16,fp8,0,0.2881600062052409
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,40,2,128,1,float16,float16,0,0.08933867017428081
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,40,2,128,1,float16,fp8,0,0.0988213320573171
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,40,4,128,1,float16,float16,0,0.11371733744939168
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,40,4,128,1,float16,fp8,0,0.13121066490809122
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,40,40,128,1,float16,float16,0,0.11529599626859029
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,40,8,128,1,float16,fp8,0,0.15542933344841003
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,40,8,128,1,float16,float16,0,0.13006400068600973
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,40,40,128,1,float16,fp8,0,0.11658133069674174
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,40,1,128,1,float16,float16,0,0.04460266729195913
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,40,1,128,1,float16,fp8,0,0.046538665890693665
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,40,2,128,1,float16,float16,0,0.04884799818197886
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,40,2,128,1,float16,fp8,0,0.04919999837875366
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,40,4,128,1,float16,float16,0,0.06221333146095276
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,40,4,128,1,float16,fp8,0,0.06695466736952464
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,40,8,128,1,float16,float16,0,0.0728053351243337
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,40,8,128,1,float16,fp8,0,0.08514666557312012
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,40,40,128,1,float16,float16,0,0.06382933259010315
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,40,40,128,1,float16,fp8,0,0.06214933097362518
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,40,1,128,1,float16,float16,0,0.026362667481104534
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,40,1,128,1,float16,fp8,0,0.030773334205150604
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,40,2,128,1,float16,float16,0,0.02762666592995326
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,40,2,128,1,float16,fp8,0,0.032069332897663116
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,40,4,128,1,float16,float16,0,0.030373332401116688
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,40,4,128,1,float16,fp8,0,0.035743998984495796
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,40,8,128,1,float16,float16,0,0.035546667873859406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,40,8,128,1,float16,fp8,0,0.04177066683769226
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,40,40,128,1,float16,float16,0,0.03133866687615713
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,40,40,128,1,float16,fp8,0,0.03190933416287104
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,40,4,128,1,float16,float16,0,0.02176533391078313
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,40,1,128,1,float16,float16,0,0.02033599962790807
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,40,1,128,1,float16,fp8,0,0.023872000475724537
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,40,2,128,1,float16,float16,0,0.020432000358899433
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,40,2,128,1,float16,fp8,0,0.02383466561635335
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,40,4,128,1,float16,fp8,0,0.025333332518736523
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,40,8,128,1,float16,float16,0,0.02219199885924657
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,40,8,128,1,float16,fp8,0,0.025909334421157837
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,40,40,128,1,float16,fp8,0,0.022202665607134502
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,40,40,128,1,float16,float16,0,0.021397332350413006
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,40,1,128,1,float16,float16,0,0.015050667027632395
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,40,1,128,1,float16,fp8,0,0.018016000588734944
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,40,2,128,1,float16,float16,0,0.015253332753976187
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,40,2,128,1,float16,fp8,0,0.017797333498795826
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,40,4,128,1,float16,float16,0,0.01568000018596649
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,40,4,128,1,float16,fp8,0,0.018042666216691334
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,40,8,128,1,float16,float16,0,0.01552533358335495
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,40,8,128,1,float16,fp8,0,0.018239999810854595
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,40,40,128,1,float16,float16,0,0.01714666684468587
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,40,40,128,1,float16,fp8,0,0.0182239996890227
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,40,1,128,1,float16,float16,0,0.014965333044528961
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,40,1,128,1,float16,fp8,0,0.017370666066805523
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,40,2,128,1,float16,float16,0,0.014864000181357065
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,40,2,128,1,float16,fp8,0,0.01754133279124896
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,40,4,128,1,float16,float16,0,0.014853333433469137
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,40,4,128,1,float16,fp8,0,0.017711999515692394
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,40,8,128,1,float16,float16,0,0.015119999647140503
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,40,8,128,1,float16,fp8,0,0.017711999515692394
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,40,40,128,1,float16,float16,0,0.013781332721312841
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,40,40,128,1,float16,fp8,0,0.014698666830857595
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,40,1,128,1,float16,float16,0,0.014309333016475042
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,40,1,128,1,float16,fp8,0,0.016623999923467636
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,40,2,128,1,float16,float16,0,0.014405333747466406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,40,2,128,1,float16,fp8,0,0.01699200024207433
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,40,4,128,1,float16,float16,0,0.01481066644191742
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,40,4,128,1,float16,fp8,0,0.01710933322707812
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,40,8,128,1,float16,float16,0,0.014970666418472925
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,40,8,128,1,float16,fp8,0,0.017008000363906223
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,40,40,128,1,float16,float16,0,0.01332266628742218
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,40,40,128,1,float16,fp8,0,0.014346666634082794
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,40,1,128,1,float16,float16,0,0.013957332819700241
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,40,1,128,1,float16,fp8,0,0.016085332880417507
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,40,2,128,1,float16,float16,0,0.013839999834696451
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,40,4,128,1,float16,fp8,0,0.016037333756685257
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,40,2,128,1,float16,fp8,0,0.015930666277805965
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,40,4,128,1,float16,float16,0,0.013850666582584381
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,40,8,128,1,float16,float16,0,0.014181333283583323
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,40,8,128,1,float16,fp8,0,0.016442666451136272
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,40,1,128,1,float16,float16,0,0.08154666423797607
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,40,1,128,1,float16,fp8,0,0.09245866537094116
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,40,4,128,1,float16,float16,0,0.12759466965993246
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,40,2,128,1,float16,float16,0,0.09700266520182292
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,40,2,128,1,float16,fp8,0,0.10848533113797505
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,40,4,128,1,float16,fp8,0,0.153519997994105
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,40,8,128,1,float16,float16,0,0.1348426640033722
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,40,8,128,1,float16,fp8,0,0.1556533376375834
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,40,1,128,1,float16,float16,0,0.04347200194994608
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,40,1,128,1,float16,fp8,0,0.04619200030962626
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,40,2,128,1,float16,fp8,0,0.05671999851862589
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,40,40,128,1,float16,fp8,0,0.17805333932240805
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,40,40,128,1,float16,float16,0,0.18000533183415732
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,40,2,128,1,float16,float16,0,0.052560001611709595
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,40,4,128,1,float16,fp8,0,0.07714133461316426
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,40,8,128,1,float16,float16,0,0.07557866473992665
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,40,4,128,1,float16,float16,0,0.07069866855939229
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,40,8,128,1,float16,fp8,0,0.08409066994984944
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,40,1,128,1,float16,float16,0,0.026330667237440746
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,40,1,128,1,float16,fp8,0,0.030752000709374745
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,40,40,128,1,float16,float16,0,0.09569066762924194
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,40,2,128,1,float16,float16,0,0.027621333797772724
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,40,4,128,1,float16,float16,0,0.035504000882307686
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,40,4,128,1,float16,fp8,0,0.04200533529122671
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,40,8,128,1,float16,float16,0,0.035360001027584076
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,40,8,128,1,float16,fp8,0,0.04118400067090988
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,40,40,128,1,float16,fp8,0,0.047168001532554626
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,40,1,128,1,float16,float16,0,0.02022933339079221
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,40,40,128,1,float16,fp8,0,0.09333333373069763
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,40,40,128,1,float16,float16,0,0.04710400104522705
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,40,1,128,1,float16,fp8,0,0.02382933348417282
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,40,4,128,1,float16,float16,0,0.02197333425283432
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,40,2,128,1,float16,float16,0,0.020362666497627895
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,40,2,128,1,float16,fp8,0,0.03233599911133448
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,40,2,128,1,float16,fp8,0,0.023658665517965954
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,40,4,128,1,float16,fp8,0,0.026047999660174053
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,40,8,128,1,float16,float16,0,0.021722666919231415
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,40,8,128,1,float16,fp8,0,0.025626666843891144
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,40,40,128,1,float16,fp8,0,0.02870933214823405
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,40,40,128,1,float16,float16,0,0.02792533238728841
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,40,1,128,1,float16,float16,0,0.01481066644191742
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,40,1,128,1,float16,fp8,0,0.017914666483799618
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,40,2,128,1,float16,float16,0,0.015168000012636185
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,40,2,128,1,float16,fp8,0,0.017808000246683758
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,40,8,128,1,float16,float16,0,0.015237333873907724
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,40,8,128,1,float16,fp8,0,0.01815466706951459
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,40,40,128,1,float16,float16,0,0.020560000091791153
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,40,1,128,1,float16,float16,0,0.01470400020480156
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,40,4,128,1,float16,float16,0,0.01552533358335495
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,40,1,128,1,float16,fp8,0,0.01735466718673706
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,40,40,128,1,float16,fp8,0,0.021482666333516438
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,40,4,128,1,float16,fp8,0,0.017781333376963932
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,40,2,128,1,float16,float16,0,0.014890667051076889
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,40,2,128,1,float16,fp8,0,0.017290666699409485
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,40,40,128,1,float16,float16,0,0.01657066618402799
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,40,4,128,1,float16,float16,0,0.014778666198253632
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,40,4,128,1,float16,fp8,0,0.017258666455745697
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,40,8,128,1,float16,float16,0,0.014741333822409311
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,40,8,128,1,float16,fp8,0,0.01717866708834966
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,40,40,128,1,float16,fp8,0,0.017808000246683758
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,40,1,128,1,float16,float16,0,0.014186666657527288
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,40,1,128,1,float16,fp8,0,0.016682667036851246
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,40,2,128,1,float16,float16,0,0.01458666721979777
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,40,4,128,1,float16,float16,0,0.014677333335081736
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,40,2,128,1,float16,fp8,0,0.017184000462293625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,40,4,128,1,float16,fp8,0,0.017594666530688603
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,40,8,128,1,float16,float16,0,0.014309333016475042
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,40,8,128,1,float16,fp8,0,0.016751999656359356
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,40,40,128,1,float16,float16,0,0.013450667262077332
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,40,4,128,1,float16,float16,0,0.01414399966597557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,40,40,128,1,float16,fp8,0,0.014650666465361914
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,40,1,128,1,float16,float16,0,0.014021333307027817
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,40,1,128,1,float16,fp8,0,0.01618133361140887
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,40,2,128,1,float16,float16,0,0.01404800017674764
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,40,2,128,1,float16,fp8,0,0.016042667130629223
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,40,4,128,1,float16,fp8,0,0.016800000021855038
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,40,8,128,1,float16,float16,0,0.013471999516089758
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,40,8,128,1,float16,fp8,0,0.015962666521469753
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,40,40,128,1,float16,float16,0,0.01312000056107839
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,40,40,128,1,float16,fp8,0,0.01421333352724711
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,40,1,128,1,float16,float16,0,0.013728000223636627
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,40,1,128,1,float16,fp8,0,0.01579733317097028
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,40,2,128,1,float16,float16,0,0.013637332866589228
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,40,2,128,1,float16,fp8,0,0.01565333331624667
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,40,4,128,1,float16,float16,0,0.013679999858140945
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,40,4,128,1,float16,fp8,0,0.016058667252461117
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,40,8,128,1,float16,float16,0,0.013466666142145792
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,40,8,128,1,float16,fp8,0,0.015589332828919092
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,32,1,128,1,float16,fp8,0,28.106190999348957
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,32,2,128,1,float16,fp8,0,28.044703165690105
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,32,1,128,1,float16,float16,0,31.46478525797526
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,32,4,128,1,float16,fp8,0,28.676544189453125
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,32,2,128,1,float16,float16,0,31.92188771565755
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,32,4,128,1,float16,float16,0,31.517300923665363
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,32,8,128,1,float16,float16,0,32.37862904866537
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,32,32,128,1,float16,fp8,0,14.79037348429362
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,32,32,128,1,float16,float16,0,16.47093327840169
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,32,1,128,1,float16,fp8,0,13.8646608988444
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,32,1,128,1,float16,float16,0,15.667914072672525
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,32,2,128,1,float16,fp8,0,14.090187072753906
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,32,2,128,1,float16,float16,0,16.066144307454426
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,32,8,128,1,float16,fp8,0,28.7846196492513
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,32,4,128,1,float16,float16,0,15.694549560546875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,32,32,128,1,float16,float16,0,8.171130498250326
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,32,32,128,1,float16,fp8,0,7.485845565795898
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,32,1,128,1,float16,float16,0,7.343994776407878
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,32,1,128,1,float16,fp8,0,7.04148801167806
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,32,4,128,1,float16,fp8,0,14.381402333577475
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,32,8,128,1,float16,fp8,0,14.314715067545572
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,32,2,128,1,float16,float16,0,7.413605372111003
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,32,8,128,1,float16,float16,0,16.201775868733723
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,32,32,128,1,float16,float16,0,3.824501355489095
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,32,2,128,1,float16,fp8,0,7.142165501912435
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,32,4,128,1,float16,float16,0,7.2318878173828125
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,32,32,128,1,float16,fp8,0,4.199173291524251
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,32,4,128,1,float16,fp8,0,7.464037577311198
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,32,1,128,1,float16,float16,0,3.693333307902018
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,32,8,128,1,float16,fp8,0,7.2361494700113935
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,32,8,128,1,float16,float16,0,8.002522786458334
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,32,1,128,1,float16,fp8,0,3.7403999964396157
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,32,2,128,1,float16,float16,0,3.5989761352539062
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,32,4,128,1,float16,float16,0,3.626821200052897
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,32,2,128,1,float16,fp8,0,3.8924214045206704
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,32,4,128,1,float16,fp8,0,3.725887934366862
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,32,8,128,1,float16,float16,0,3.8675947189331055
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,32,8,128,1,float16,fp8,0,3.674053192138672
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,32,1,128,1,float16,fp8,0,15.960938771565756
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,32,1,128,1,float16,float16,0,18.31393051147461
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,32,2,128,1,float16,fp8,0,16.093546549479168
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,32,2,128,1,float16,float16,0,17.986106872558594
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,32,4,128,1,float16,fp8,0,16.65066146850586
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,32,4,128,1,float16,float16,0,18.00555674235026
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,32,8,128,1,float16,fp8,0,16.44690195719401
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,32,8,128,1,float16,float16,0,19.311856587727863
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,32,32,128,1,float16,float16,0,9.471824010213217
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,32,32,128,1,float16,fp8,0,8.790682474772135
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,32,1,128,1,float16,float16,0,9.063056310017904
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,32,1,128,1,float16,fp8,0,8.245775858561197
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,32,2,128,1,float16,fp8,0,8.185173034667969
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,32,2,128,1,float16,float16,0,9.172800064086914
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,32,4,128,1,float16,float16,0,8.86520512898763
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,32,4,128,1,float16,fp8,0,8.539951960245768
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,32,32,128,1,float16,float16,0,4.609290758768718
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,32,32,128,1,float16,fp8,0,4.404085477193196
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,32,1,128,1,float16,float16,0,4.121061325073242
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,32,1,128,1,float16,fp8,0,4.156410535176595
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,32,2,128,1,float16,float16,0,4.2631839116414385
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,32,2,128,1,float16,fp8,0,4.089968045552571
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,32,8,128,1,float16,fp8,0,8.349093119303385
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,32,8,128,1,float16,float16,0,8.856762568155924
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,32,4,128,1,float16,float16,0,4.313930511474609
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,32,32,128,1,float16,fp8,0,2.2501920064290366
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,32,4,128,1,float16,fp8,0,4.268298784891765
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,32,32,128,1,float16,float16,0,2.2065866788228354
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,32,1,128,1,float16,float16,0,2.048133373260498
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,32,8,128,1,float16,fp8,0,4.254117329915364
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,32,1,128,1,float16,fp8,0,2.1945172945658364
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,32,2,128,1,float16,float16,0,2.142202695210775
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,32,8,128,1,float16,float16,0,4.213808059692383
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,32,2,128,1,float16,fp8,0,2.104346593221029
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,32,4,128,1,float16,float16,0,2.0593013763427734
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,32,4,128,1,float16,fp8,0,2.1893760363260903
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,32,8,128,1,float16,float16,0,2.1449173291524253
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,32,8,128,1,float16,fp8,0,2.1814932823181152
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,32,1,128,1,float16,fp8,0,11.271872202555338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,32,1,128,1,float16,float16,0,12.363525390625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,32,2,128,1,float16,float16,0,12.367973327636719
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,32,4,128,1,float16,float16,0,12.260794321695963
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,32,2,128,1,float16,fp8,0,11.261418660481771
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,32,4,128,1,float16,fp8,0,11.81341298421224
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,32,8,128,1,float16,fp8,0,11.754192352294922
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,32,8,128,1,float16,float16,0,12.96896489461263
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,32,32,128,1,float16,float16,0,6.587973276774089
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,32,1,128,1,float16,fp8,0,5.830405553181966
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,32,32,128,1,float16,fp8,0,6.214853286743164
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,32,2,128,1,float16,fp8,0,5.689162572224935
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,32,1,128,1,float16,float16,0,5.84770139058431
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,32,2,128,1,float16,float16,0,5.890960057576497
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,32,4,128,1,float16,float16,0,6.323386510213216
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,32,4,128,1,float16,fp8,0,6.233903884887695
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,32,8,128,1,float16,float16,0,6.43605359395345
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,32,32,128,1,float16,fp8,0,3.1322453816731772
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,32,1,128,1,float16,float16,0,2.872031847635905
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,32,32,128,1,float16,float16,0,3.0494667689005532
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,32,1,128,1,float16,fp8,0,2.9388694763183594
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,32,8,128,1,float16,fp8,0,5.945440292358398
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,32,2,128,1,float16,float16,0,2.9204638799031577
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,32,2,128,1,float16,fp8,0,2.908938725789388
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,32,4,128,1,float16,float16,0,2.9516000747680664
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,32,32,128,1,float16,float16,0,1.5776480038960774
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,32,4,128,1,float16,fp8,0,3.0041494369506836
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,32,8,128,1,float16,float16,0,3.0768000284830728
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,32,32,128,1,float16,fp8,0,1.7065119743347168
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,32,8,128,1,float16,fp8,0,3.029290517171224
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,32,2,128,1,float16,float16,0,1.46724271774292
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,32,1,128,1,float16,float16,0,1.4409119288126628
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,32,2,128,1,float16,fp8,0,1.4904212951660156
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,32,4,128,1,float16,float16,0,1.5038080215454102
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,32,8,128,1,float16,float16,0,1.529136021931966
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,32,4,128,1,float16,fp8,0,1.5612853368123372
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,32,1,128,1,float16,fp8,0,1.551754633585612
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,32,8,128,1,float16,fp8,0,1.5589280128479004
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,32,1,128,1,float16,fp8,0,14.577194213867188
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,32,1,128,1,float16,float16,0,16.474143981933594
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,32,2,128,1,float16,fp8,0,14.825562795003256
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,32,2,128,1,float16,float16,0,16.61025619506836
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,32,4,128,1,float16,float16,0,16.591754913330078
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,32,8,128,1,float16,float16,0,17.0117925008138
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,32,4,128,1,float16,fp8,0,15.57156753540039
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,32,32,128,1,float16,float16,0,8.693978627522787
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,32,1,128,1,float16,float16,0,8.235797246297201
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,32,1,128,1,float16,fp8,0,7.5494028727213545
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,32,32,128,1,float16,fp8,0,8.449525197347006
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,32,2,128,1,float16,float16,0,8.387834548950195
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,32,2,128,1,float16,fp8,0,7.540842692057292
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,32,8,128,1,float16,fp8,0,15.34500249226888
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,32,4,128,1,float16,float16,0,8.216058731079102
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,32,32,128,1,float16,float16,0,4.0973866780598955
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,32,1,128,1,float16,float16,0,3.7950452168782554
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,32,32,128,1,float16,fp8,0,4.481082598368327
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,32,2,128,1,float16,float16,0,3.813621203104655
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,32,4,128,1,float16,fp8,0,7.863648096720378
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,32,1,128,1,float16,fp8,0,3.6936000188191733
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,32,8,128,1,float16,float16,0,8.483407974243164
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,32,8,128,1,float16,fp8,0,7.836357116699219
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,32,4,128,1,float16,float16,0,3.8648160298665366
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,32,2,128,1,float16,fp8,0,3.899242719014486
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,32,4,128,1,float16,fp8,0,3.952725410461426
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,32,32,128,1,float16,float16,0,2.0320372581481934
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,32,1,128,1,float16,float16,0,1.8655999501546223
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,32,32,128,1,float16,fp8,0,2.2571627298990884
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,32,8,128,1,float16,float16,0,3.9768053690592446
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,32,8,128,1,float16,fp8,0,3.971973419189453
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,32,1,128,1,float16,fp8,0,2.08079465230306
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,32,2,128,1,float16,float16,0,1.9039306640625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,32,2,128,1,float16,fp8,0,1.9201866785685222
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,32,4,128,1,float16,float16,0,1.901920000712077
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,32,4,128,1,float16,fp8,0,2.013706684112549
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,32,8,128,1,float16,float16,0,1.9792639414469402
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,32,32,128,1,float16,float16,0,1.0568426450093586
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,32,32,128,1,float16,fp8,0,1.153765360514323
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,32,8,128,1,float16,fp8,0,2.124586741129557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,32,1,128,1,float16,float16,0,0.9607253074645996
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,32,1,128,1,float16,fp8,0,0.9882720311482748
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,32,2,128,1,float16,float16,0,1.0374240080515544
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,32,2,128,1,float16,fp8,0,0.998410701751709
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,32,4,128,1,float16,float16,0,0.985765298207601
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,32,4,128,1,float16,fp8,0,1.0503946940104167
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,32,8,128,1,float16,float16,0,1.0122453371683757
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,32,8,128,1,float16,fp8,0,1.0619680086771648
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,32,1,128,1,float16,fp8,0,8.571589152018229
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,32,1,128,1,float16,float16,0,9.575738906860352
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,32,2,128,1,float16,float16,0,9.550730387369791
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,32,2,128,1,float16,fp8,0,8.610725402832031
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,32,4,128,1,float16,float16,0,9.678634643554688
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,32,4,128,1,float16,fp8,0,9.14955202738444
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,32,8,128,1,float16,float16,0,9.69217618306478
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,32,8,128,1,float16,fp8,0,9.16325314839681
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,32,32,128,1,float16,float16,0,5.017749468485515
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,32,1,128,1,float16,float16,0,4.430186589558919
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,32,1,128,1,float16,fp8,0,4.287546793619792
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,32,32,128,1,float16,fp8,0,5.025557200113933
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,32,2,128,1,float16,float16,0,4.5643361409505205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,32,2,128,1,float16,fp8,0,4.330426534016927
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,32,4,128,1,float16,float16,0,4.5551252365112305
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,32,4,128,1,float16,fp8,0,4.648634592692058
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,32,32,128,1,float16,float16,0,2.412463982899984
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,32,32,128,1,float16,fp8,0,2.507861296335856
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,32,1,128,1,float16,float16,0,2.1624959309895835
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,32,1,128,1,float16,fp8,0,2.166597366333008
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,32,2,128,1,float16,float16,0,2.25764799118042
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,32,8,128,1,float16,float16,0,4.68942387898763
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,32,8,128,1,float16,fp8,0,4.8865814208984375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,32,2,128,1,float16,fp8,0,2.2190826733907065
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,32,4,128,1,float16,float16,0,2.3069492975870767
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,32,32,128,1,float16,float16,0,1.2547840277353923
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,32,4,128,1,float16,fp8,0,2.3493173917134604
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,32,8,128,1,float16,float16,0,2.3363733291625977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,32,1,128,1,float16,float16,0,1.077893336613973
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,32,8,128,1,float16,fp8,0,2.359674612681071
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,32,32,128,1,float16,fp8,0,1.5247680346171062
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,32,1,128,1,float16,fp8,0,1.1269013086954753
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,32,2,128,1,float16,float16,0,1.1554240385691326
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,32,2,128,1,float16,fp8,0,1.164138634999593
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,32,4,128,1,float16,float16,0,1.130186637242635
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,32,4,128,1,float16,fp8,0,1.2717706362406414
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,32,8,128,1,float16,float16,0,1.1640106836954753
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,32,32,128,1,float16,float16,0,0.6484959920247396
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,32,8,128,1,float16,fp8,0,1.2170026302337646
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,32,1,128,1,float16,fp8,0,0.5910773277282715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,32,32,128,1,float16,fp8,0,0.6707680225372314
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,32,1,128,1,float16,float16,0,0.5732800165812174
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,32,2,128,1,float16,float16,0,0.5804959932963053
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,32,2,128,1,float16,fp8,0,0.603546659151713
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,32,4,128,1,float16,float16,0,0.5964693228403727
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,32,4,128,1,float16,fp8,0,0.6308960119883219
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,32,8,128,1,float16,float16,0,0.6105600198109945
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,32,8,128,1,float16,fp8,0,0.6354293425877889
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,32,1,128,1,float16,float16,0,8.532453536987305
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,32,1,128,1,float16,fp8,0,8.115226745605469
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,32,2,128,1,float16,float16,0,8.621466954549154
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,32,2,128,1,float16,fp8,0,8.269760131835938
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,32,4,128,1,float16,float16,0,8.979850769042969
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,32,4,128,1,float16,fp8,0,8.881024042765299
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,32,8,128,1,float16,float16,0,9.60865592956543
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,32,1,128,1,float16,float16,0,3.9989547729492188
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,32,32,128,1,float16,float16,0,4.70087464650472
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,32,1,128,1,float16,fp8,0,4.215749422709147
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,32,2,128,1,float16,float16,0,4.198917388916016
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,32,32,128,1,float16,fp8,0,5.016672134399414
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,32,2,128,1,float16,fp8,0,4.241845448811849
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,32,8,128,1,float16,fp8,0,9.183818817138672
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,32,32,128,1,float16,float16,0,2.3733545939127603
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,32,32,128,1,float16,fp8,0,2.485919952392578
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,32,1,128,1,float16,float16,0,2.0085013707478843
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,32,4,128,1,float16,float16,0,4.250181198120117
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,32,4,128,1,float16,fp8,0,4.608725229899089
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,32,8,128,1,float16,float16,0,4.454319953918457
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,32,8,128,1,float16,fp8,0,4.558383941650391
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,32,1,128,1,float16,fp8,0,2.0616639455159507
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,32,2,128,1,float16,fp8,0,2.11189874013265
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,32,2,128,1,float16,float16,0,2.194021383921305
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,32,4,128,1,float16,float16,0,2.1518932978312173
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,32,32,128,1,float16,float16,0,1.1882452964782715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,32,8,128,1,float16,float16,0,2.215936024983724
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,32,4,128,1,float16,fp8,0,2.345861275990804
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,32,32,128,1,float16,fp8,0,1.2672959963480632
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,32,8,128,1,float16,fp8,0,2.367237408955892
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,32,1,128,1,float16,float16,0,0.9978720347086588
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,32,2,128,1,float16,float16,0,1.05842129389445
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,32,4,128,1,float16,float16,0,1.0841866334279378
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,32,1,128,1,float16,fp8,0,1.0981280008951824
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,32,2,128,1,float16,fp8,0,1.0845013459523518
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,32,4,128,1,float16,fp8,0,1.1840159893035889
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,32,8,128,1,float16,float16,0,1.1179786523183186
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,32,32,128,1,float16,fp8,0,0.6658346652984619
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,32,32,128,1,float16,float16,0,0.6134186585744222
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,32,1,128,1,float16,float16,0,0.5235093434651693
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,32,8,128,1,float16,fp8,0,1.1814506848653157
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,32,1,128,1,float16,fp8,0,0.5560959974924723
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,32,2,128,1,float16,float16,0,0.5425546566645304
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,32,2,128,1,float16,fp8,0,0.5694239934285482
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,32,4,128,1,float16,float16,0,0.5585600137710571
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,32,4,128,1,float16,fp8,0,0.6067200104395548
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,32,8,128,1,float16,float16,0,0.5782560110092163
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,32,32,128,1,float16,float16,0,0.332533339659373
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,32,8,128,1,float16,fp8,0,0.6200853188832601
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,32,32,128,1,float16,fp8,0,0.35859731833140057
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,32,1,128,1,float16,float16,0,0.28099199136098224
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,32,1,128,1,float16,fp8,0,0.3001546661059062
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,32,2,128,1,float16,float16,0,0.2908906737963359
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,32,2,128,1,float16,fp8,0,0.3065386613210042
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,32,4,128,1,float16,float16,0,0.2929546634356181
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,32,4,128,1,float16,fp8,0,0.3261760075887044
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,32,8,128,1,float16,float16,0,0.31333865722020465
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,32,8,128,1,float16,fp8,0,0.33455467224121094
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,32,1,128,1,float16,float16,0,4.716224034627278
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,32,2,128,1,float16,float16,0,4.806853294372559
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,32,1,128,1,float16,fp8,0,4.911562601725261
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,32,2,128,1,float16,fp8,0,4.9919999440511065
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,32,4,128,1,float16,float16,0,5.185381253560384
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,32,4,128,1,float16,fp8,0,5.429168065388997
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,32,8,128,1,float16,float16,0,5.4591623942057295
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,32,8,128,1,float16,fp8,0,5.449525197347005
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,32,1,128,1,float16,float16,0,2.3317813873291016
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,32,32,128,1,float16,fp8,0,3.2190879185994468
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,32,32,128,1,float16,float16,0,2.849850654602051
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,32,1,128,1,float16,fp8,0,2.4581759770711265
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,32,2,128,1,float16,float16,0,2.4917972882588706
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,32,2,128,1,float16,fp8,0,2.5148800214131675
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,32,4,128,1,float16,float16,0,2.657024065653483
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,32,4,128,1,float16,fp8,0,2.742981274922689
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,32,32,128,1,float16,float16,0,1.443295955657959
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,32,8,128,1,float16,float16,0,2.6561226844787598
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,32,1,128,1,float16,float16,0,1.1762879689534504
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,32,32,128,1,float16,fp8,0,1.5682346026102703
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,32,8,128,1,float16,fp8,0,2.780282656351725
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,32,1,128,1,float16,fp8,0,1.2495840390523274
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,32,2,128,1,float16,float16,0,1.2438506285349529
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,32,2,128,1,float16,fp8,0,1.2702986399332683
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,32,4,128,1,float16,float16,0,1.2864480018615723
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,32,4,128,1,float16,fp8,0,1.515610694885254
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,32,32,128,1,float16,fp8,0,0.8016106287638346
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,32,8,128,1,float16,float16,0,1.3400479952494304
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,32,32,128,1,float16,float16,0,0.7432160377502441
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,32,1,128,1,float16,fp8,0,0.6405760049819946
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,32,8,128,1,float16,fp8,0,1.4134987195332844
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,32,1,128,1,float16,float16,0,0.6078879833221436
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,32,2,128,1,float16,float16,0,0.630624016125997
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,32,2,128,1,float16,fp8,0,0.6781813303629557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,32,4,128,1,float16,float16,0,0.66539200146993
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,32,8,128,1,float16,fp8,0,0.7349759737650553
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,32,4,128,1,float16,fp8,0,0.7257653077443441
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,32,8,128,1,float16,float16,0,0.6889333724975586
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,32,32,128,1,float16,float16,0,0.3898880084355672
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,32,32,128,1,float16,fp8,0,0.42666133244832355
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,32,1,128,1,float16,float16,0,0.3233013351758321
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,32,1,128,1,float16,fp8,0,0.3426133394241333
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,32,2,128,1,float16,float16,0,0.33340267340342206
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,32,2,128,1,float16,fp8,0,0.3534080187479655
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,32,4,128,1,float16,float16,0,0.3452106714248657
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,32,4,128,1,float16,fp8,0,0.3838026523590088
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,32,8,128,1,float16,float16,0,0.36236266295115155
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,32,8,128,1,float16,fp8,0,0.3941813309987386
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,32,32,128,1,float16,float16,0,0.21457600593566895
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,32,32,128,1,float16,fp8,0,0.2374026576677958
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,32,1,128,1,float16,float16,0,0.1746399998664856
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,32,1,128,1,float16,fp8,0,0.18685867389043173
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,32,2,128,1,float16,float16,0,0.1830400029818217
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,32,2,128,1,float16,fp8,0,0.1950613260269165
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,32,4,128,1,float16,float16,0,0.18773865699768066
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,32,4,128,1,float16,fp8,0,0.20336000124613443
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,32,8,128,1,float16,float16,0,0.20169599850972494
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,32,8,128,1,float16,fp8,0,0.2193333307902018
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,32,1,128,1,float16,float16,0,4.511173248291016
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,32,2,128,1,float16,float16,0,4.771493275960286
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,32,1,128,1,float16,fp8,0,4.865013440450032
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,32,4,128,1,float16,float16,0,5.189813296000163
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,32,2,128,1,float16,fp8,0,5.039600054423015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,32,4,128,1,float16,fp8,0,5.517509460449219
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,32,8,128,1,float16,float16,0,5.37934939066569
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,32,32,128,1,float16,float16,0,2.9995787938435874
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,32,1,128,1,float16,float16,0,2.2787680625915527
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,32,1,128,1,float16,fp8,0,2.4776426951090493
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,32,2,128,1,float16,float16,0,2.359429359436035
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,32,2,128,1,float16,fp8,0,2.486186663309733
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,32,4,128,1,float16,float16,0,2.61135466893514
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,32,32,128,1,float16,fp8,0,3.296997388203939
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,32,8,128,1,float16,float16,0,2.728581428527832
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,32,4,128,1,float16,fp8,0,2.854175885518392
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,32,1,128,1,float16,float16,0,1.163599967956543
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,32,32,128,1,float16,float16,0,1.5059733390808105
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,32,1,128,1,float16,fp8,0,1.250874678293864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,32,32,128,1,float16,fp8,0,1.6374932924906414
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,32,8,128,1,float16,fp8,0,2.8851359685262046
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,32,8,128,1,float16,fp8,0,5.580981572469075
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,32,2,128,1,float16,float16,0,1.21997865041097
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,32,4,128,1,float16,float16,0,1.3003146648406982
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,32,4,128,1,float16,fp8,0,1.4384586016337078
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,32,8,128,1,float16,float16,0,1.3623520533243816
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,32,32,128,1,float16,float16,0,0.7672213713328043
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,32,2,128,1,float16,fp8,0,1.279642661412557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,32,8,128,1,float16,fp8,0,1.4569600423177083
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,32,1,128,1,float16,float16,0,0.5863999923070272
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,32,1,128,1,float16,fp8,0,0.6362773180007935
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,32,32,128,1,float16,fp8,0,0.84169602394104
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,32,2,128,1,float16,fp8,0,0.6585546731948853
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,32,2,128,1,float16,float16,0,0.6190880139668783
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,32,4,128,1,float16,float16,0,0.6712426344553629
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,32,4,128,1,float16,fp8,0,0.7351413567860922
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,32,8,128,1,float16,float16,0,0.6954560279846191
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,32,32,128,1,float16,float16,0,0.39551464716593426
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,32,8,128,1,float16,fp8,0,0.7476960023244222
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,32,32,128,1,float16,fp8,0,0.43825066089630127
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,32,1,128,1,float16,float16,0,0.3088586727778117
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,32,1,128,1,float16,fp8,0,0.3381013472874959
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,32,2,128,1,float16,float16,0,0.3260800043741862
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,32,2,128,1,float16,fp8,0,0.3482186794281006
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,32,4,128,1,float16,float16,0,0.34753600756327313
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,32,4,128,1,float16,fp8,0,0.38707200686136883
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,32,8,128,1,float16,float16,0,0.36538132031758624
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,32,8,128,1,float16,fp8,0,0.3999679883321126
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,32,32,128,1,float16,float16,0,0.21392534176508585
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,32,32,128,1,float16,fp8,0,0.23756800095240274
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,32,1,128,1,float16,float16,0,0.16692266861597696
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,32,1,128,1,float16,fp8,0,0.18480533361434937
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,32,2,128,1,float16,fp8,0,0.19341866175333658
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,32,2,128,1,float16,float16,0,0.17638399203618368
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,32,4,128,1,float16,float16,0,0.18961066007614136
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,32,4,128,1,float16,fp8,0,0.20989867051442465
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,32,8,128,1,float16,float16,0,0.20004266500473022
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,32,8,128,1,float16,fp8,0,0.21918932596842447
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,32,32,128,1,float16,float16,0,0.12167466680208842
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,32,32,128,1,float16,fp8,0,0.13714667161305746
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,32,1,128,1,float16,float16,0,0.09047999978065491
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,32,1,128,1,float16,fp8,0,0.0990666647752126
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,32,2,128,1,float16,float16,0,0.09664533535639445
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,32,2,128,1,float16,fp8,0,0.10502400000890096
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,32,4,128,1,float16,float16,0,0.10168000062306722
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,32,4,128,1,float16,fp8,0,0.11352533102035522
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,32,8,128,1,float16,float16,0,0.11459733049074809
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,32,8,128,1,float16,fp8,0,0.1221386690934499
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,32,1,128,1,float16,float16,0,2.763392130533854
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,32,1,128,1,float16,fp8,0,3.0169973373413086
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,32,2,128,1,float16,float16,0,2.9191360473632812
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,32,4,128,1,float16,float16,0,3.228853225708008
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,32,2,128,1,float16,fp8,0,3.1269706090291343
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,32,4,128,1,float16,fp8,0,3.5728480021158853
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,32,8,128,1,float16,float16,0,3.4082187016805015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,32,32,128,1,float16,float16,0,1.9286452929178874
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,32,8,128,1,float16,fp8,0,3.5883572896321616
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,32,1,128,1,float16,float16,0,1.3836906750996907
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,32,32,128,1,float16,fp8,0,2.2028586069742837
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,32,1,128,1,float16,fp8,0,1.5326293309529622
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,32,2,128,1,float16,float16,0,1.4823466936747234
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,32,4,128,1,float16,float16,0,1.646224021911621
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,32,2,128,1,float16,fp8,0,1.6570080121358235
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,32,4,128,1,float16,fp8,0,1.7983946800231934
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,32,8,128,1,float16,float16,0,1.704869270324707
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,32,32,128,1,float16,float16,0,0.9648160139719645
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,32,32,128,1,float16,fp8,0,1.1007306575775146
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,32,8,128,1,float16,fp8,0,1.8743252754211426
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,32,1,128,1,float16,float16,0,0.7111946741739908
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,32,1,128,1,float16,fp8,0,0.7849653561909994
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,32,2,128,1,float16,float16,0,0.7586186726888021
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,32,2,128,1,float16,fp8,0,0.8040640354156494
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,32,4,128,1,float16,float16,0,0.8258559703826904
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,32,4,128,1,float16,fp8,0,0.9336640040079752
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,32,8,128,1,float16,float16,0,0.8781387011210123
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,32,32,128,1,float16,float16,0,0.4996853272120158
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,32,32,128,1,float16,fp8,0,0.5617760022481283
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,32,8,128,1,float16,fp8,0,0.963759978612264
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,32,1,128,1,float16,float16,0,0.36698134740193683
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,32,1,128,1,float16,fp8,0,0.40700801213582355
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,32,2,128,1,float16,fp8,0,0.4225226640701294
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,32,2,128,1,float16,float16,0,0.3946026563644409
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,32,4,128,1,float16,fp8,0,0.47862398624420166
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,32,4,128,1,float16,float16,0,0.43197333812713623
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,32,8,128,1,float16,float16,0,0.45281068483988446
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,32,8,128,1,float16,fp8,0,0.4954186677932739
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,32,32,128,1,float16,float16,0,0.2640373309453328
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,32,32,128,1,float16,fp8,0,0.29969600836435956
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,32,1,128,1,float16,float16,0,0.19773866732915243
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,32,1,128,1,float16,fp8,0,0.21726399660110474
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,32,2,128,1,float16,float16,0,0.21478400627772012
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,32,8,128,1,float16,fp8,0,0.2667466600735982
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,32,2,128,1,float16,fp8,0,0.22706133127212524
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,32,4,128,1,float16,float16,0,0.23158933718999228
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,32,4,128,1,float16,fp8,0,0.2558186650276184
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,32,8,128,1,float16,float16,0,0.2413919965426127
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,32,32,128,1,float16,float16,0,0.1455680032571157
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,32,32,128,1,float16,fp8,0,0.16461867094039917
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,32,1,128,1,float16,float16,0,0.11085333426793416
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,32,1,128,1,float16,fp8,0,0.12127466996510823
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,32,2,128,1,float16,float16,0,0.11916266878445943
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,32,2,128,1,float16,fp8,0,0.12572266658147177
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,32,4,128,1,float16,float16,0,0.11991999546686809
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,32,4,128,1,float16,fp8,0,0.13593066732088724
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,32,8,128,1,float16,float16,0,0.13364799817403158
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,32,8,128,1,float16,fp8,0,0.1469386617342631
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,32,32,128,1,float16,float16,0,0.0855466624101003
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,32,32,128,1,float16,fp8,0,0.09885332981745402
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,32,1,128,1,float16,float16,0,0.06027733286221822
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,32,1,128,1,float16,fp8,0,0.06758933266003926
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,32,2,128,1,float16,float16,0,0.06397866706053416
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,32,2,128,1,float16,fp8,0,0.07189333438873291
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,32,4,128,1,float16,float16,0,0.06874133149782817
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,32,4,128,1,float16,fp8,0,0.07902400195598602
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,32,8,128,1,float16,float16,0,0.07580799857775371
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,32,8,128,1,float16,fp8,0,0.08123733103275299
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,32,1,128,1,float16,float16,0,2.8836800257364907
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,32,1,128,1,float16,fp8,0,3.2257493336995444
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,32,2,128,1,float16,float16,0,3.0967785517374673
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,32,8,128,1,float16,float16,0,3.735306739807129
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,32,32,128,1,float16,float16,0,2.1441386540730796
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,32,4,128,1,float16,float16,0,3.49727471669515
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,32,8,128,1,float16,fp8,0,4.085237185160319
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,32,1,128,1,float16,float16,0,1.4972373644510906
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,32,4,128,1,float16,fp8,0,3.9137067794799805
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,32,2,128,1,float16,fp8,0,3.374389330546061
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,32,32,128,1,float16,fp8,0,2.503173351287842
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,32,1,128,1,float16,fp8,0,1.6287946701049805
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,32,2,128,1,float16,float16,0,1.5623092651367188
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,32,2,128,1,float16,fp8,0,1.7034667332967122
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,32,4,128,1,float16,float16,0,1.7824640274047852
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,32,32,128,1,float16,float16,0,1.097925345102946
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,32,4,128,1,float16,fp8,0,2.00273068745931
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,32,32,128,1,float16,fp8,0,1.2633972962697346
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,32,1,128,1,float16,float16,0,0.7455786863962809
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,32,8,128,1,float16,float16,0,1.8633653322855632
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,32,8,128,1,float16,fp8,0,2.080560048421224
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,32,1,128,1,float16,fp8,0,0.8341226577758789
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,32,2,128,1,float16,float16,0,0.8127840360005697
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,32,4,128,1,float16,float16,0,0.9002666473388672
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,32,4,128,1,float16,fp8,0,1.0122506618499756
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,32,8,128,1,float16,float16,0,0.9616266886393229
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,32,8,128,1,float16,fp8,0,1.038805325826009
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,32,2,128,1,float16,fp8,0,0.8650240103403727
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,32,32,128,1,float16,float16,0,0.5489333470662435
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,32,1,128,1,float16,float16,0,0.3835413455963135
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,32,32,128,1,float16,fp8,0,0.6504160165786743
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,32,1,128,1,float16,fp8,0,0.4309120178222656
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,32,2,128,1,float16,float16,0,0.4169866641362508
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,32,2,128,1,float16,fp8,0,0.4497493505477905
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,32,4,128,1,float16,float16,0,0.46244267622629803
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,32,4,128,1,float16,fp8,0,0.5262026786804199
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,32,8,128,1,float16,float16,0,0.4862080017725627
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,32,32,128,1,float16,float16,0,0.29028799136479694
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,32,8,128,1,float16,fp8,0,0.5427733262379965
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,32,32,128,1,float16,fp8,0,0.3392373323440552
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,32,1,128,1,float16,float16,0,0.20324265956878662
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,32,1,128,1,float16,fp8,0,0.227674663066864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,32,2,128,1,float16,float16,0,0.2246826688448588
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,32,2,128,1,float16,fp8,0,0.2392266591389974
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,32,4,128,1,float16,fp8,0,0.275546669960022
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,32,8,128,1,float16,float16,0,0.2593173384666443
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,32,4,128,1,float16,float16,0,0.24706665674845377
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,32,8,128,1,float16,fp8,0,0.2894879976908366
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,32,32,128,1,float16,float16,0,0.15621333320935568
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,32,32,128,1,float16,fp8,0,0.18243199586868286
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,32,1,128,1,float16,float16,0,0.11166399717330933
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,32,2,128,1,float16,fp8,0,0.13505066434542337
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,32,1,128,1,float16,fp8,0,0.12771200140317282
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,32,2,128,1,float16,float16,0,0.12281066179275513
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,32,4,128,1,float16,float16,0,0.1330400009950002
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,32,4,128,1,float16,fp8,0,0.1509813368320465
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,32,8,128,1,float16,float16,0,0.14058666427930197
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,32,8,128,1,float16,fp8,0,0.16065067052841187
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,32,32,128,1,float16,float16,0,0.08966933687527974
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,32,32,128,1,float16,fp8,0,0.10558399558067322
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,32,2,128,1,float16,fp8,0,0.0747573326031367
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,32,1,128,1,float16,float16,0,0.062128002444903054
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,32,1,128,1,float16,fp8,0,0.07037333150704701
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,32,2,128,1,float16,float16,0,0.06771733363469441
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,32,4,128,1,float16,float16,0,0.06936533252398173
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,32,4,128,1,float16,fp8,0,0.08145600060621898
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,32,8,128,1,float16,float16,0,0.0804746647675832
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,32,8,128,1,float16,fp8,0,0.08801600337028503
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,32,32,128,1,float16,float16,0,0.05343466500441233
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,32,32,128,1,float16,fp8,0,0.0628053347269694
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,32,1,128,1,float16,float16,0,0.03588266670703888
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,32,1,128,1,float16,fp8,0,0.042954668402671814
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,32,2,128,1,float16,fp8,0,0.047279998660087585
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,32,2,128,1,float16,float16,0,0.04160533348719279
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,32,8,128,1,float16,fp8,0,0.05131733417510986
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,32,4,128,1,float16,float16,0,0.043280000487963356
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,32,4,128,1,float16,fp8,0,0.050666665037473045
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,32,8,128,1,float16,float16,0,0.04452266792456309
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,32,1,128,1,float16,float16,0,2.1443732579549155
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,32,1,128,1,float16,fp8,0,2.44486935933431
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,32,2,128,1,float16,fp8,0,2.602565288543701
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,32,2,128,1,float16,float16,0,2.3207573890686035
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,32,4,128,1,float16,fp8,0,3.1535679499308267
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,32,1,128,1,float16,float16,0,1.0790026982625325
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,32,32,128,1,float16,float16,0,1.7417707443237305
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,32,1,128,1,float16,fp8,0,1.2442879676818848
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,32,32,128,1,float16,fp8,0,2.2126612663269043
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,32,2,128,1,float16,fp8,0,1.3134666283925374
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,32,2,128,1,float16,float16,0,1.182960033416748
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,32,4,128,1,float16,float16,0,2.77020263671875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,32,4,128,1,float16,fp8,0,1.6136587460835774
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,32,32,128,1,float16,float16,0,0.8769066333770752
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,32,4,128,1,float16,float16,0,1.3895039558410645
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,32,8,128,1,float16,fp8,0,1.6925493876139324
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,32,1,128,1,float16,float16,0,0.5549066861470541
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,32,8,128,1,float16,float16,0,1.4750720659891765
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,32,1,128,1,float16,fp8,0,0.6307733456293741
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,32,2,128,1,float16,float16,0,0.621392011642456
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,32,32,128,1,float16,fp8,0,1.1246986389160156
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,32,8,128,1,float16,fp8,0,3.3159891764322915
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,32,8,128,1,float16,float16,0,2.985285441080729
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,32,4,128,1,float16,float16,0,0.7074613571166992
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,32,32,128,1,float16,float16,0,0.4559359947840373
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,32,1,128,1,float16,fp8,0,0.3307573397954305
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,32,2,128,1,float16,fp8,0,0.6721706390380859
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,32,8,128,1,float16,fp8,0,0.8760053316752116
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,32,8,128,1,float16,float16,0,0.7574826876322428
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,32,2,128,1,float16,fp8,0,0.3506186803181966
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,32,4,128,1,float16,fp8,0,0.824677308400472
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,32,4,128,1,float16,float16,0,0.36557332674662274
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,32,2,128,1,float16,float16,0,0.32338132460912067
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,32,8,128,1,float16,float16,0,0.38571735223134357
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,32,4,128,1,float16,fp8,0,0.42898666858673096
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,32,1,128,1,float16,float16,0,0.15455999970436096
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,32,32,128,1,float16,fp8,0,0.29627732435862225
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,32,32,128,1,float16,float16,0,0.2366186579068502
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,32,8,128,1,float16,fp8,0,0.44948267936706543
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,32,1,128,1,float16,fp8,0,0.17655466000239053
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,32,2,128,1,float16,float16,0,0.17259732882181802
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,32,2,128,1,float16,fp8,0,0.18971200784047446
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,32,4,128,1,float16,float16,0,0.1911733349164327
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,32,4,128,1,float16,fp8,0,0.22636799017588297
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,32,8,128,1,float16,float16,0,0.20611733198165894
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,32,32,128,1,float16,float16,0,0.12794666488965353
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,32,1,128,1,float16,float16,0,0.08628799517949422
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,32,32,128,1,float16,fp8,0,0.1627679963906606
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,32,32,128,1,float16,fp8,0,0.5708320140838623
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,32,8,128,1,float16,fp8,0,0.23914666970570883
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,32,1,128,1,float16,fp8,0,0.10042666395505269
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,32,1,128,1,float16,float16,0,0.2885333299636841
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,32,2,128,1,float16,float16,0,0.09480533003807068
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,32,2,128,1,float16,fp8,0,0.10642133156458537
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,32,4,128,1,float16,float16,0,0.10455999771753947
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,32,4,128,1,float16,fp8,0,0.11980799833933513
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,32,8,128,1,float16,float16,0,0.1112000048160553
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,32,8,128,1,float16,fp8,0,0.13142399986584982
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,32,32,128,1,float16,float16,0,0.07317333420117696
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,32,32,128,1,float16,fp8,0,0.0926026701927185
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,32,1,128,1,float16,float16,0,0.0483893354733785
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,32,1,128,1,float16,fp8,0,0.05499733487764994
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,32,2,128,1,float16,float16,0,0.051738664507865906
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,32,2,128,1,float16,fp8,0,0.05823466678460439
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,32,4,128,1,float16,float16,0,0.05492799977461497
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,32,4,128,1,float16,fp8,0,0.06363733112812042
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,32,8,128,1,float16,float16,0,0.06428266565004985
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,32,8,128,1,float16,fp8,0,0.06980800131956737
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,32,32,128,1,float16,float16,0,0.04258666435877482
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,32,32,128,1,float16,fp8,0,0.050154666105906166
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,32,1,128,1,float16,float16,0,0.030282666285832722
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,32,1,128,1,float16,fp8,0,0.03531199942032496
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,32,4,128,1,float16,fp8,0,0.03910933434963226
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,32,2,128,1,float16,float16,0,0.031221332649389904
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,32,2,128,1,float16,fp8,0,0.036559998989105225
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,32,4,128,1,float16,float16,0,0.03363733241955439
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,32,8,128,1,float16,float16,0,0.03383466601371765
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,32,8,128,1,float16,fp8,0,0.03965866565704346
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,32,32,128,1,float16,float16,0,0.02367466688156128
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,32,32,128,1,float16,fp8,0,0.029157333076000214
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,32,1,128,1,float16,float16,0,0.019978666057189304
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,32,1,128,1,float16,fp8,0,0.023141334454218548
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,32,2,128,1,float16,float16,0,0.020010666300853092
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,32,2,128,1,float16,fp8,0,0.02366400013367335
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,32,4,128,1,float16,float16,0,0.02176533391078313
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,32,4,128,1,float16,fp8,0,0.025578667720158894
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,32,8,128,1,float16,float16,0,0.02183466653029124
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,32,8,128,1,float16,fp8,0,0.025861332813898723
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,32,1,128,1,float16,float16,0,0.897536039352417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,32,1,128,1,float16,fp8,0,1.036911964416504
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,32,2,128,1,float16,float16,0,1.0061279932657878
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,32,2,128,1,float16,fp8,0,1.1224266688028972
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,32,4,128,1,float16,float16,0,1.2089333534240723
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,32,32,128,1,float16,float16,0,0.7908480167388916
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,32,4,128,1,float16,fp8,0,1.4238133430480957
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,32,8,128,1,float16,float16,0,1.2957706451416016
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,32,1,128,1,float16,float16,0,0.45661334196726483
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,32,32,128,1,float16,fp8,0,0.9683199723561605
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,32,1,128,1,float16,fp8,0,0.5305546522140503
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,32,2,128,1,float16,float16,0,0.5237013498942057
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,32,2,128,1,float16,fp8,0,0.5774666468302408
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,32,8,128,1,float16,fp8,0,1.5215892791748047
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,32,4,128,1,float16,float16,0,0.6128213405609131
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,32,8,128,1,float16,float16,0,0.6645386616388956
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,32,4,128,1,float16,fp8,0,0.7267999649047852
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,32,32,128,1,float16,float16,0,0.4045279820760091
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,32,1,128,1,float16,float16,0,0.24026666084925333
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,32,32,128,1,float16,fp8,0,0.4930560191472371
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,32,8,128,1,float16,fp8,0,0.7810186545054117
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,32,2,128,1,float16,float16,0,0.2675679922103882
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,32,4,128,1,float16,float16,0,0.3195733428001404
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,32,1,128,1,float16,fp8,0,0.27948800722757977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,32,2,128,1,float16,fp8,0,0.2998986641565959
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,32,4,128,1,float16,fp8,0,0.3771359920501709
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,32,8,128,1,float16,fp8,0,0.39641066392262775
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,32,32,128,1,float16,fp8,0,0.26048000653584796
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,32,1,128,1,float16,float16,0,0.13005866607030234
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,32,32,128,1,float16,float16,0,0.21220266819000244
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,32,1,128,1,float16,fp8,0,0.150218665599823
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,32,2,128,1,float16,float16,0,0.14480533202489218
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,32,2,128,1,float16,fp8,0,0.16236266493797302
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,32,8,128,1,float16,float16,0,0.3419040044148763
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,32,4,128,1,float16,float16,0,0.1674720048904419
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,32,4,128,1,float16,fp8,0,0.2000746726989746
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,32,8,128,1,float16,float16,0,0.18258132537206015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,32,32,128,1,float16,float16,0,0.1160586675008138
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,32,8,128,1,float16,fp8,0,0.21686400969823202
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,32,32,128,1,float16,fp8,0,0.1413333316644033
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,32,1,128,1,float16,float16,0,0.0732479989528656
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,32,1,128,1,float16,fp8,0,0.08533866206804912
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,32,2,128,1,float16,float16,0,0.08116266628106435
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,32,2,128,1,float16,fp8,0,0.0928106705347697
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,32,4,128,1,float16,float16,0,0.09051199754079182
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,32,4,128,1,float16,fp8,0,0.1093280017375946
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,32,8,128,1,float16,float16,0,0.09885332981745402
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,32,8,128,1,float16,fp8,0,0.11812800168991089
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,32,32,128,1,float16,float16,0,0.06596266726652782
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,32,32,128,1,float16,fp8,0,0.08184533317883809
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,32,1,128,1,float16,float16,0,0.039877332746982574
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,32,1,128,1,float16,fp8,0,0.04774933556715647
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,32,2,128,1,float16,float16,0,0.0423573354880015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,32,2,128,1,float16,fp8,0,0.05026666820049286
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,32,4,128,1,float16,float16,0,0.04602666695912679
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,32,4,128,1,float16,fp8,0,0.05412266651789347
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,32,8,128,1,float16,float16,0,0.055904000997543335
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,32,8,128,1,float16,fp8,0,0.06265600025653839
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,32,32,128,1,float16,float16,0,0.03772799919048945
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,32,32,128,1,float16,fp8,0,0.045935998360315956
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,32,1,128,1,float16,float16,0,0.024720000723997753
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,32,1,128,1,float16,fp8,0,0.029535998900731403
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,32,2,128,1,float16,float16,0,0.025685332715511322
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,32,2,128,1,float16,fp8,0,0.030432000756263733
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,32,4,128,1,float16,float16,0,0.0281333327293396
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,32,4,128,1,float16,fp8,0,0.03335466732581457
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,32,8,128,1,float16,float16,0,0.028560000161329906
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,32,8,128,1,float16,fp8,0,0.034474665919939675
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,32,32,128,1,float16,float16,0,0.020234666764736176
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,32,32,128,1,float16,fp8,0,0.025802666942278545
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,32,1,128,1,float16,float16,0,0.01714133347074191
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,32,1,128,1,float16,fp8,0,0.01988799994190534
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,32,2,128,1,float16,float16,0,0.017498667041460674
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,32,2,128,1,float16,fp8,0,0.020186666399240494
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,32,4,128,1,float16,float16,0,0.018874666343132656
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,32,4,128,1,float16,fp8,0,0.02162133405605952
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,32,8,128,1,float16,float16,0,0.018874666343132656
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,32,8,128,1,float16,fp8,0,0.022218666970729828
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,32,32,128,1,float16,float16,0,0.017792000124851864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,32,2,128,1,float16,float16,0,0.016666666915019352
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,32,32,128,1,float16,fp8,0,0.02319466571013133
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,32,1,128,1,float16,float16,0,0.01661866654952367
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,32,1,128,1,float16,fp8,0,0.019472000499566395
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,32,2,128,1,float16,fp8,0,0.01932799940307935
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,32,4,128,1,float16,float16,0,0.01693333312869072
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,32,4,128,1,float16,fp8,0,0.01916266605257988
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,32,8,128,1,float16,float16,0,0.017125333348910015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,32,8,128,1,float16,fp8,0,0.020367999871571858
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,32,1,128,1,float16,float16,0,0.42550400892893475
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,32,1,128,1,float16,fp8,0,0.4724533160527547
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,32,2,128,1,float16,float16,0,0.47888000806172687
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,32,2,128,1,float16,fp8,0,0.5178186496098837
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,32,8,128,1,float16,fp8,0,0.7286880016326904
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,32,32,128,1,float16,float16,0,0.3840159972508748
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,32,1,128,1,float16,float16,0,0.22406933705012003
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,32,4,128,1,float16,float16,0,0.5694666703542074
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,32,4,128,1,float16,fp8,0,0.64083198706309
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,32,1,128,1,float16,fp8,0,0.25340267022450763
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,32,8,128,1,float16,float16,0,0.6361333529154459
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,32,4,128,1,float16,float16,0,0.29360532760620117
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,32,2,128,1,float16,float16,0,0.25542932748794556
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,32,32,128,1,float16,fp8,0,0.228928009668986
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,32,8,128,1,float16,fp8,0,0.37964268525441486
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,32,1,128,1,float16,float16,0,0.1220960021018982
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,32,2,128,1,float16,fp8,0,0.27197333176930744
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,32,8,128,1,float16,float16,0,0.32791467507680255
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,32,32,128,1,float16,float16,0,0.2019946575164795
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,32,2,128,1,float16,fp8,0,0.149536003669103
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,32,4,128,1,float16,fp8,0,0.33644266923268634
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,32,2,128,1,float16,float16,0,0.137855996688207
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,32,4,128,1,float16,float16,0,0.1551199952761332
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,32,1,128,1,float16,fp8,0,0.13876266280810037
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,32,32,128,1,float16,fp8,0,0.42476268609364826
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,32,4,128,1,float16,fp8,0,0.18022932608922324
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,32,8,128,1,float16,fp8,0,0.20268267393112183
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,32,32,128,1,float16,fp8,0,0.1244533360004425
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,32,32,128,1,float16,float16,0,0.11023466785748799
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,32,8,128,1,float16,float16,0,0.17446933190027872
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,32,1,128,1,float16,float16,0,0.06950933237870534
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,32,1,128,1,float16,fp8,0,0.07898666461308797
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,32,2,128,1,float16,fp8,0,0.08426666259765625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,32,4,128,1,float16,float16,0,0.08390399813652039
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,32,2,128,1,float16,float16,0,0.07843199868996938
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,32,4,128,1,float16,fp8,0,0.09804800152778625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,32,8,128,1,float16,float16,0,0.09593066573143005
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,32,32,128,1,float16,float16,0,0.06302933394908905
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,32,8,128,1,float16,fp8,0,0.11058132847150166
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,32,32,128,1,float16,fp8,0,0.06937600175539653
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,32,1,128,1,float16,float16,0,0.03488533447186152
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,32,1,128,1,float16,fp8,0,0.03923733284076055
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,32,4,128,1,float16,fp8,0,0.048250665267308555
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,32,2,128,1,float16,float16,0,0.03851199895143509
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,32,2,128,1,float16,fp8,0,0.04244266450405121
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,32,4,128,1,float16,float16,0,0.04127466678619385
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,32,8,128,1,float16,float16,0,0.05346133311589559
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,32,8,128,1,float16,fp8,0,0.055386667450269066
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,32,32,128,1,float16,float16,0,0.03541333228349686
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,32,32,128,1,float16,fp8,0,0.034154665966828666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,32,1,128,1,float16,float16,0,0.021503999829292297
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,32,1,128,1,float16,fp8,0,0.025077333052953083
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,32,2,128,1,float16,float16,0,0.022730665902296703
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,32,2,128,1,float16,fp8,0,0.026357332865397137
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,32,4,128,1,float16,float16,0,0.02516799916823705
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,32,4,128,1,float16,fp8,0,0.028965334097544353
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,32,8,128,1,float16,float16,0,0.025706666211287182
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,32,8,128,1,float16,fp8,0,0.02956266701221466
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,32,32,128,1,float16,float16,0,0.018800000349680584
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,32,32,128,1,float16,fp8,0,0.021551998953024547
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,32,1,128,1,float16,float16,0,0.015557333827018738
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,32,1,128,1,float16,fp8,0,0.018229333062966663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,32,2,128,1,float16,float16,0,0.016010666886965435
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,32,2,128,1,float16,fp8,0,0.018357332795858383
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,32,4,128,1,float16,float16,0,0.017338667064905167
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,32,4,128,1,float16,fp8,0,0.01947733387351036
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,32,8,128,1,float16,float16,0,0.017269333203633625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,32,8,128,1,float16,fp8,0,0.02019199977318446
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,32,32,128,1,float16,float16,0,0.01603200038274129
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,32,32,128,1,float16,fp8,0,0.018250666558742523
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,32,1,128,1,float16,float16,0,0.015008000036080679
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,32,1,128,1,float16,fp8,0,0.017525333911180496
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,32,2,128,1,float16,float16,0,0.015450666348139444
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,32,2,128,1,float16,fp8,0,0.017418666432301205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,32,4,128,1,float16,float16,0,0.015498666713635126
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,32,4,128,1,float16,fp8,0,0.01785600061217944
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,32,8,128,1,float16,float16,0,0.015664000064134598
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,32,8,128,1,float16,fp8,0,0.01764800027012825
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,32,32,128,1,float16,float16,0,0.015168000012636185
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,32,32,128,1,float16,fp8,0,0.018042666216691334
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,32,4,128,1,float16,float16,0,0.01479999969402949
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,32,1,128,1,float16,float16,0,0.014778666198253632
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,32,1,128,1,float16,fp8,0,0.017263999829689663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,32,2,128,1,float16,float16,0,0.014837333311637243
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,32,2,128,1,float16,fp8,0,0.017701332767804463
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,32,4,128,1,float16,fp8,0,0.01687466725707054
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,32,8,128,1,float16,float16,0,0.014906667172908783
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,32,8,128,1,float16,fp8,0,0.01754133279124896
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,32,1,128,1,float16,float16,0,0.2207253376642863
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,32,1,128,1,float16,fp8,0,0.25174399216969806
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,32,2,128,1,float16,float16,0,0.25546665986378986
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,32,2,128,1,float16,fp8,0,0.26582932472229004
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,32,4,128,1,float16,float16,0,0.2899786631266276
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,32,4,128,1,float16,fp8,0,0.33721065521240234
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,32,32,128,1,float16,float16,0,0.22879467407862344
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,32,32,128,1,float16,fp8,0,0.2242400050163269
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,32,8,128,1,float16,fp8,0,0.37514666716257733
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,32,8,128,1,float16,float16,0,0.32602665821711224
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,32,1,128,1,float16,float16,0,0.11994133392969768
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,32,2,128,1,float16,float16,0,0.1368000010649363
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,32,4,128,1,float16,float16,0,0.15425599614779154
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,32,8,128,1,float16,float16,0,0.17334399620691934
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,32,1,128,1,float16,fp8,0,0.1395093301932017
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,32,32,128,1,float16,fp8,0,0.12124799688657124
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,32,1,128,1,float16,float16,0,0.07019199927647908
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,32,8,128,1,float16,fp8,0,0.20060799519220987
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,32,32,128,1,float16,float16,0,0.1221386690934499
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,32,2,128,1,float16,fp8,0,0.15037866433461508
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,32,4,128,1,float16,fp8,0,0.17918399969736734
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,32,2,128,1,float16,float16,0,0.07773866752783458
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,32,4,128,1,float16,float16,0,0.08349333206812541
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,32,2,128,1,float16,fp8,0,0.08521599570910136
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,32,1,128,1,float16,fp8,0,0.0791786660750707
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,32,8,128,1,float16,fp8,0,0.1088800032933553
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,32,32,128,1,float16,fp8,0,0.06695466736952464
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,32,32,128,1,float16,float16,0,0.06898133456707001
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,32,1,128,1,float16,float16,0,0.03456533451875051
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,32,8,128,1,float16,float16,0,0.09638399879137675
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,32,1,128,1,float16,fp8,0,0.03948266555865606
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,32,4,128,1,float16,fp8,0,0.09843200445175171
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,32,2,128,1,float16,float16,0,0.03755199909210205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,32,8,128,1,float16,float16,0,0.05266133447488149
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,32,2,128,1,float16,fp8,0,0.042021334171295166
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,32,4,128,1,float16,float16,0,0.040805332362651825
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,32,4,128,1,float16,fp8,0,0.04814933240413666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,32,8,128,1,float16,fp8,0,0.05643199880917867
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,32,32,128,1,float16,float16,0,0.0388373335202535
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,32,32,128,1,float16,fp8,0,0.034501334031422935
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,32,1,128,1,float16,float16,0,0.021301334102948506
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,32,1,128,1,float16,fp8,0,0.025173333783944447
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,32,2,128,1,float16,float16,0,0.022522665560245514
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,32,2,128,1,float16,fp8,0,0.026357332865397137
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,32,4,128,1,float16,float16,0,0.025360000630219776
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,32,4,128,1,float16,fp8,0,0.029205332199732464
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,32,8,128,1,float16,float16,0,0.025434667865435284
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,32,8,128,1,float16,fp8,0,0.029839999973773956
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,32,32,128,1,float16,float16,0,0.020256000260512035
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,32,32,128,1,float16,fp8,0,0.020954666038354237
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,32,4,128,1,float16,float16,0,0.017114666601022083
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,32,1,128,1,float16,float16,0,0.015520000209410986
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,32,1,128,1,float16,fp8,0,0.018112000077962875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,32,2,128,1,float16,float16,0,0.015754666179418564
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,32,2,128,1,float16,fp8,0,0.018346666047970455
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,32,4,128,1,float16,fp8,0,0.019909333437681198
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,32,8,128,1,float16,float16,0,0.017173333714405697
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,32,8,128,1,float16,fp8,0,0.01998399943113327
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,32,32,128,1,float16,float16,0,0.014581333845853806
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,32,1,128,1,float16,float16,0,0.014789332946141561
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,32,32,128,1,float16,fp8,0,0.014757333944241205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,32,1,128,1,float16,fp8,0,0.01782400036851565
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,32,2,128,1,float16,float16,0,0.015029333531856537
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,32,2,128,1,float16,fp8,0,0.01807466646035512
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,32,4,128,1,float16,float16,0,0.015130666395028433
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,32,4,128,1,float16,fp8,0,0.017386666188637417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,32,8,128,1,float16,float16,0,0.015365333606799444
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,32,8,128,1,float16,fp8,0,0.017605333278576534
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,32,32,128,1,float16,float16,0,0.01313599944114685
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,32,32,128,1,float16,fp8,0,0.01394133393963178
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,32,4,128,1,float16,float16,0,0.015184000134468079
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,32,1,128,1,float16,float16,0,0.015098666151364645
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,32,1,128,1,float16,fp8,0,0.017349333812793095
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,32,2,128,1,float16,float16,0,0.015013333410024643
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,32,2,128,1,float16,fp8,0,0.017994667092959087
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,32,4,128,1,float16,fp8,0,0.016741332908471424
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,32,8,128,1,float16,float16,0,0.014869333555301031
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,32,8,128,1,float16,fp8,0,0.017535999417304993
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,32,32,128,1,float16,float16,0,0.013023999830087027
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,32,32,128,1,float16,fp8,0,0.013525333255529404
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,32,1,128,1,float16,float16,0,0.014111999422311783
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,32,1,128,1,float16,fp8,0,0.01616000011563301
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,32,2,128,1,float16,float16,0,0.01392000044385592
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,32,2,128,1,float16,fp8,0,0.01605333387851715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,32,4,128,1,float16,float16,0,0.014554666976133982
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,32,4,128,1,float16,fp8,0,0.016021333634853363
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,32,8,128,1,float16,float16,0,0.014240000396966934
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,32,8,128,1,float16,fp8,0,0.01724799970785777
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,32,1,128,1,float16,float16,0,0.11922132968902588
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,32,1,128,1,float16,fp8,0,0.13961600263913473
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,32,2,128,1,float16,float16,0,0.13544533650080362
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,32,2,128,1,float16,fp8,0,0.14944000045458475
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,32,4,128,1,float16,float16,0,0.15377066532770792
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,32,4,128,1,float16,fp8,0,0.17854400475819907
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,32,32,128,1,float16,float16,0,0.17269867658615112
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,32,8,128,1,float16,float16,0,0.1716853380203247
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,32,32,128,1,float16,fp8,0,0.1688213348388672
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,32,8,128,1,float16,fp8,0,0.20082666476567587
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,32,1,128,1,float16,float16,0,0.06894933183987935
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,32,1,128,1,float16,fp8,0,0.07796800136566162
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,32,2,128,1,float16,float16,0,0.07725333174069722
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,32,4,128,1,float16,float16,0,0.08317333459854126
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,32,2,128,1,float16,fp8,0,0.08557867010434468
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,32,4,128,1,float16,fp8,0,0.09770666559537251
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,32,8,128,1,float16,float16,0,0.09532266855239868
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,32,32,128,1,float16,float16,0,0.09319466352462769
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,32,8,128,1,float16,fp8,0,0.10889599720637004
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,32,32,128,1,float16,fp8,0,0.09168000022570293
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,32,1,128,1,float16,float16,0,0.03443733354409536
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,32,1,128,1,float16,fp8,0,0.04001600046952566
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,32,2,128,1,float16,float16,0,0.039477333426475525
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,32,2,128,1,float16,fp8,0,0.04248533149560293
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,32,4,128,1,float16,float16,0,0.04112533231576284
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,32,4,128,1,float16,fp8,0,0.04794666667779287
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,32,8,128,1,float16,float16,0,0.05308799942334493
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,32,8,128,1,float16,fp8,0,0.053898667295773826
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,32,32,128,1,float16,float16,0,0.05134400228659312
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,32,32,128,1,float16,fp8,0,0.04764799773693085
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,32,1,128,1,float16,float16,0,0.021317332983016968
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,32,1,128,1,float16,fp8,0,0.02478933334350586
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,32,2,128,1,float16,float16,0,0.022410665949185688
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,32,2,128,1,float16,fp8,0,0.026159999271233875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,32,4,128,1,float16,float16,0,0.024874667326609295
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,32,4,128,1,float16,fp8,0,0.029285334050655365
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,32,8,128,1,float16,float16,0,0.0249439999461174
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,32,8,128,1,float16,fp8,0,0.02959466725587845
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,32,32,128,1,float16,float16,0,0.026421333352724712
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,32,32,128,1,float16,fp8,0,0.027285332481066387
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,32,1,128,1,float16,float16,0,0.015541333705186844
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,32,1,128,1,float16,fp8,0,0.018181333939234417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,32,2,128,1,float16,float16,0,0.01570133368174235
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,32,2,128,1,float16,fp8,0,0.018288000176350277
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,32,4,128,1,float16,float16,0,0.016970666746298473
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,32,4,128,1,float16,fp8,0,0.01966399947802226
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,32,8,128,1,float16,float16,0,0.016965333372354507
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,32,8,128,1,float16,fp8,0,0.020015999674797058
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,32,32,128,1,float16,float16,0,0.01777600000301997
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,32,32,128,1,float16,fp8,0,0.01850133389234543
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,32,1,128,1,float16,float16,0,0.014954666296641031
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,32,1,128,1,float16,fp8,0,0.017535999417304993
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,32,2,128,1,float16,float16,0,0.015189333508412043
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,32,4,128,1,float16,float16,0,0.015130666395028433
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,32,4,128,1,float16,fp8,0,0.017759999881188076
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,32,2,128,1,float16,fp8,0,0.017866666118303936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,32,8,128,1,float16,float16,0,0.015141333142916361
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,32,8,128,1,float16,fp8,0,0.017935999979575474
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,32,32,128,1,float16,float16,0,0.013461332768201828
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,32,32,128,1,float16,fp8,0,0.014325333138306936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,32,1,128,1,float16,float16,0,0.014495999862750372
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,32,1,128,1,float16,fp8,0,0.017322666943073273
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,32,2,128,1,float16,float16,0,0.014560000350077948
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,32,2,128,1,float16,fp8,0,0.017317333569129307
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,32,4,128,1,float16,float16,0,0.014618666221698126
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,32,4,128,1,float16,fp8,0,0.01711999997496605
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,32,8,128,1,float16,float16,0,0.014741333822409311
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,32,8,128,1,float16,fp8,0,0.017397332936525345
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,32,32,128,1,float16,float16,0,0.013077333569526672
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,32,32,128,1,float16,fp8,0,0.013663999736309052
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,32,1,128,1,float16,float16,0,0.013888000200192133
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,32,1,128,1,float16,fp8,0,0.01600533351302147
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,32,2,128,1,float16,float16,0,0.01403733342885971
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,32,2,128,1,float16,fp8,0,0.01624533285697301
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,32,4,128,1,float16,float16,0,0.014463999619086584
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,32,4,128,1,float16,fp8,0,0.016773333152135212
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,32,8,128,1,float16,float16,0,0.013983999689420065
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,32,8,128,1,float16,fp8,0,0.01621333385507266
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,32,32,128,1,float16,float16,0,0.01267733300725619
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,32,32,128,1,float16,fp8,0,0.013295999417702356
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,32,1,128,1,float16,float16,0,0.013967999567588171
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,32,1,128,1,float16,fp8,0,0.01613866661985715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,32,2,128,1,float16,float16,0,0.013839999834696451
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,32,2,128,1,float16,fp8,0,0.01603200038274129
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,32,4,128,1,float16,float16,0,0.013653332988421122
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,32,4,128,1,float16,fp8,0,0.015728000551462173
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,32,8,128,1,float16,float16,0,0.013760000467300415
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,32,8,128,1,float16,fp8,0,0.01584533353646596
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,32,1,128,1,float16,float16,0,0.06916266679763794
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,32,1,128,1,float16,fp8,0,0.07860266665617625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,32,2,128,1,float16,float16,0,0.07807999849319458
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,32,2,128,1,float16,fp8,0,0.08583466211954753
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,32,4,128,1,float16,float16,0,0.08225066463152568
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,32,4,128,1,float16,fp8,0,0.09774933258692424
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,32,32,128,1,float16,float16,0,0.14458133776982626
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,32,8,128,1,float16,float16,0,0.10443733135859172
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,32,32,128,1,float16,fp8,0,0.14146133263905844
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,32,8,128,1,float16,fp8,0,0.10674666364987691
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,32,1,128,1,float16,float16,0,0.03531199942032496
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,32,1,128,1,float16,fp8,0,0.03941866755485535
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,32,2,128,1,float16,float16,0,0.03856533269087473
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,32,2,128,1,float16,fp8,0,0.042165334026018776
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,32,4,128,1,float16,float16,0,0.0408693328499794
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,32,32,128,1,float16,float16,0,0.07738666733105977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,32,4,128,1,float16,fp8,0,0.04748799900213877
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,32,8,128,1,float16,float16,0,0.05861866474151611
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,32,8,128,1,float16,fp8,0,0.05308799942334493
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,32,32,128,1,float16,fp8,0,0.07148266832033794
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,32,1,128,1,float16,float16,0,0.021477334201335907
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,32,1,128,1,float16,fp8,0,0.024864000578721363
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,32,2,128,1,float16,float16,0,0.02252800017595291
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,32,2,128,1,float16,fp8,0,0.02619733413060506
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,32,4,128,1,float16,float16,0,0.024720000723997753
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,32,4,128,1,float16,fp8,0,0.028565332293510437
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,32,8,128,1,float16,float16,0,0.02845866729815801
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,32,8,128,1,float16,fp8,0,0.029391999046007793
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,32,32,128,1,float16,float16,0,0.03930133332808813
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,32,32,128,1,float16,fp8,0,0.03922666609287262
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,32,1,128,1,float16,float16,0,0.015530666957298914
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,32,1,128,1,float16,fp8,0,0.018144000321626663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,32,2,128,1,float16,float16,0,0.01570133368174235
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,32,2,128,1,float16,fp8,0,0.018266666680574417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,32,4,128,1,float16,float16,0,0.01674666628241539
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,32,4,128,1,float16,fp8,0,0.01945066700379054
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,32,8,128,1,float16,float16,0,0.01854933301607768
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,32,8,128,1,float16,fp8,0,0.0195573332409064
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,32,32,128,1,float16,float16,0,0.02439466615517934
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,32,32,128,1,float16,fp8,0,0.02459733436505
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,32,1,128,1,float16,float16,0,0.014922666052977243
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,32,1,128,1,float16,fp8,0,0.017317333569129307
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,32,2,128,1,float16,float16,0,0.015189333508412043
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,32,2,128,1,float16,fp8,0,0.01749333366751671
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,32,4,128,1,float16,float16,0,0.015344000111023584
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,32,4,128,1,float16,fp8,0,0.01757866640885671
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,32,8,128,1,float16,float16,0,0.01351999988158544
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,32,8,128,1,float16,fp8,0,0.014208000153303146
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,32,1,128,1,float16,fp8,0,0.017797333498795826
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,32,32,128,1,float16,float16,0,0.017077332983414333
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,32,32,128,1,float16,fp8,0,0.017840000490347546
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,32,1,128,1,float16,float16,0,0.01471466695268949
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,32,2,128,1,float16,float16,0,0.014730667074521383
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,32,4,128,1,float16,float16,0,0.014346666634082794
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,32,2,128,1,float16,fp8,0,0.017840000490347546
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,32,4,128,1,float16,fp8,0,0.016810666769742966
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,32,8,128,1,float16,float16,0,0.013189333180586496
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,32,8,128,1,float16,fp8,0,0.013770667215188345
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,32,32,128,1,float16,float16,0,0.013349333157142004
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,32,32,128,1,float16,fp8,0,0.013999999811251959
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,32,1,128,1,float16,float16,0,0.014127999544143677
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,32,1,128,1,float16,fp8,0,0.01621866722901662
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,32,2,128,1,float16,float16,0,0.01413333291808764
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,32,2,128,1,float16,fp8,0,0.016250666230916977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,32,4,128,1,float16,float16,0,0.013679999858140945
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,32,4,128,1,float16,fp8,0,0.015791999797026317
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,32,8,128,1,float16,float16,0,0.01313599944114685
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,32,8,128,1,float16,fp8,0,0.013365333278973898
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,32,32,128,1,float16,float16,0,0.01309866706530253
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,32,1,128,1,float16,float16,0,0.013839999834696451
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,32,32,128,1,float16,fp8,0,0.01393066719174385
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,32,1,128,1,float16,fp8,0,0.01609066625436147
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,32,2,128,1,float16,float16,0,0.013760000467300415
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,32,2,128,1,float16,fp8,0,0.015919999529918034
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,32,4,128,1,float16,float16,0,0.013552000125249227
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,32,4,128,1,float16,fp8,0,0.015696000307798386
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,32,8,128,1,float16,float16,0,0.012154666086037954
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,32,8,128,1,float16,fp8,0,0.01257066677014033
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,32,32,128,1,float16,float16,0,0.012602667013804117
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,32,32,128,1,float16,fp8,0,0.013386666774749756
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,32,1,128,1,float16,float16,0,0.013493333011865616
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,32,1,128,1,float16,fp8,0,0.015504000087579092
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,32,2,128,1,float16,float16,0,0.01370666672786077
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,32,2,128,1,float16,fp8,0,0.01573866605758667
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,32,4,128,1,float16,float16,0,0.013424000392357508
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,32,4,128,1,float16,fp8,0,0.01525866612792015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,32,8,128,1,float16,float16,0,0.011866666376590729
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,32,8,128,1,float16,fp8,0,0.012432000289360682
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,24,1,128,1,float16,fp8,0,20.921780904134113
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,24,2,128,1,float16,fp8,0,21.188186645507812
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,24,1,128,1,float16,float16,0,23.779398600260418
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,24,2,128,1,float16,float16,0,23.873392740885418
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,24,4,128,1,float16,fp8,0,21.8463617960612
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,24,8,128,1,float16,fp8,0,21.989237467447918
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,24,4,128,1,float16,float16,0,24.185630798339844
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,24,8,128,1,float16,float16,0,24.556437174479168
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,24,24,128,1,float16,float16,0,12.503035227457682
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,24,24,128,1,float16,fp8,0,11.290261586507162
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,24,1,128,1,float16,float16,0,11.964682261149088
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,24,1,128,1,float16,fp8,0,10.593173344930014
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,24,2,128,1,float16,float16,0,11.944608052571615
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,24,2,128,1,float16,fp8,0,10.60696029663086
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,24,4,128,1,float16,float16,0,12.039957682291666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,24,4,128,1,float16,fp8,0,11.097892761230469
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,24,24,128,1,float16,float16,0,6.025818506876628
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,24,24,128,1,float16,fp8,0,5.715461095174153
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,24,1,128,1,float16,fp8,0,5.330650647481282
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,24,1,128,1,float16,float16,0,5.756325403849284
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,24,2,128,1,float16,float16,0,5.763557434082031
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,24,2,128,1,float16,fp8,0,5.362010955810547
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,24,8,128,1,float16,fp8,0,11.042789459228516
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,24,8,128,1,float16,float16,0,12.207579294840494
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,24,24,128,1,float16,float16,0,2.9005438486735025
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,24,4,128,1,float16,float16,0,5.9591623942057295
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,24,24,128,1,float16,fp8,0,2.9258025487264
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,24,4,128,1,float16,fp8,0,5.558581034342448
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,24,8,128,1,float16,float16,0,5.880858739217122
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,24,1,128,1,float16,float16,0,2.7174240748087564
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,24,8,128,1,float16,fp8,0,5.78329594930013
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,24,1,128,1,float16,fp8,0,3.1093759536743164
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,24,2,128,1,float16,float16,0,2.789658546447754
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,24,2,128,1,float16,fp8,0,2.8968639373779297
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,24,4,128,1,float16,float16,0,2.8217652638753257
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,24,8,128,1,float16,float16,0,2.8548692067464194
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,24,4,128,1,float16,fp8,0,3.1762612660725913
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,24,8,128,1,float16,fp8,0,2.8632853825887046
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,24,1,128,1,float16,fp8,0,12.086612701416016
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,24,1,128,1,float16,float16,0,13.406794230143229
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,24,2,128,1,float16,fp8,0,12.192340850830078
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,24,2,128,1,float16,float16,0,13.7467892964681
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,24,4,128,1,float16,float16,0,14.265183766682943
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,24,4,128,1,float16,fp8,0,12.63650639851888
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,24,8,128,1,float16,float16,0,14.203360239664713
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,24,1,128,1,float16,float16,0,6.509775797526042
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,24,1,128,1,float16,fp8,0,6.234111785888672
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,24,24,128,1,float16,float16,0,7.061365127563477
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,24,2,128,1,float16,fp8,0,6.122853597005208
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,24,2,128,1,float16,float16,0,6.816426595052083
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,24,24,128,1,float16,fp8,0,6.584501266479492
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,24,4,128,1,float16,float16,0,6.913658777872722
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,24,8,128,1,float16,fp8,0,12.67242177327474
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,24,24,128,1,float16,float16,0,3.3260958989461265
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,24,24,128,1,float16,fp8,0,3.575018564860026
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,24,1,128,1,float16,float16,0,3.0326401392618814
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,24,8,128,1,float16,float16,0,6.924922943115234
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,24,4,128,1,float16,fp8,0,6.771029154459636
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,24,1,128,1,float16,fp8,0,3.287141482035319
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,24,8,128,1,float16,fp8,0,6.533871968587239
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,24,2,128,1,float16,float16,0,3.0391359329223633
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,24,2,128,1,float16,fp8,0,3.188847859700521
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,24,4,128,1,float16,float16,0,3.232874552408854
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,24,24,128,1,float16,float16,0,1.7093332608540852
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,24,4,128,1,float16,fp8,0,3.40609073638916
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,24,8,128,1,float16,float16,0,3.2558933893839517
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,24,1,128,1,float16,float16,0,1.5698026021321614
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,24,24,128,1,float16,fp8,0,1.7451999982198079
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,24,8,128,1,float16,fp8,0,3.2841545740763345
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,24,1,128,1,float16,fp8,0,1.5932693481445312
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,24,2,128,1,float16,float16,0,1.7773173650105794
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,24,2,128,1,float16,fp8,0,1.6698719660441081
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,24,4,128,1,float16,float16,0,1.6333759625752766
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,24,4,128,1,float16,fp8,0,1.7434345881144206
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,24,8,128,1,float16,float16,0,1.6300853093465169
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,24,8,128,1,float16,fp8,0,1.694719950358073
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,24,1,128,1,float16,float16,0,9.541610717773438
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,24,1,128,1,float16,fp8,0,8.478293100992838
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,24,2,128,1,float16,float16,0,9.530197143554688
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,24,4,128,1,float16,float16,0,9.886149088541666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,24,4,128,1,float16,fp8,0,9.03377596537272
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,24,2,128,1,float16,fp8,0,8.57375462849935
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,24,8,128,1,float16,float16,0,9.999258677164713
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,24,8,128,1,float16,fp8,0,9.052719751993815
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,24,24,128,1,float16,float16,0,4.893973350524902
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,24,1,128,1,float16,float16,0,4.504469235738118
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,24,24,128,1,float16,fp8,0,4.764255841573079
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,24,2,128,1,float16,float16,0,4.406074523925781
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,24,1,128,1,float16,fp8,0,4.556480089823405
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,24,2,128,1,float16,fp8,0,4.338917414347331
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,24,4,128,1,float16,float16,0,4.585328102111816
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,24,4,128,1,float16,fp8,0,4.545221328735352
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,24,24,128,1,float16,float16,0,2.3827412923177085
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,24,1,128,1,float16,float16,0,2.1464319229125977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,24,1,128,1,float16,fp8,0,2.190080006917318
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,24,24,128,1,float16,fp8,0,2.6070399284362793
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,24,8,128,1,float16,float16,0,4.563253402709961
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,24,8,128,1,float16,fp8,0,4.722768147786458
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,24,2,128,1,float16,float16,0,2.2213333447774253
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,24,2,128,1,float16,fp8,0,2.1942240397135415
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,24,4,128,1,float16,float16,0,2.29748805363973
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,24,4,128,1,float16,fp8,0,2.337941328684489
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,24,24,128,1,float16,float16,0,1.2451306978861492
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,24,8,128,1,float16,float16,0,2.3000853856404624
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,24,24,128,1,float16,fp8,0,1.2648053169250488
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,24,8,128,1,float16,fp8,0,2.3448425928751626
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,24,1,128,1,float16,float16,0,1.0964852968851726
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,24,1,128,1,float16,fp8,0,1.1265386740366619
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,24,2,128,1,float16,float16,0,1.144704023996989
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,24,2,128,1,float16,fp8,0,1.1551573276519775
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,24,4,128,1,float16,float16,0,1.2124640146891277
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,24,4,128,1,float16,fp8,0,1.250170628229777
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,24,8,128,1,float16,float16,0,1.1947147051493328
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,24,8,128,1,float16,fp8,0,1.2182559967041016
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,24,1,128,1,float16,fp8,0,11.000746409098307
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,24,1,128,1,float16,float16,0,12.130874633789062
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,24,2,128,1,float16,fp8,0,11.207247416178385
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,24,2,128,1,float16,float16,0,12.613909403483072
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,24,4,128,1,float16,float16,0,13.02609634399414
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,24,4,128,1,float16,fp8,0,11.91153081258138
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,24,8,128,1,float16,fp8,0,11.988117218017578
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,24,8,128,1,float16,float16,0,13.252928415934244
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,24,24,128,1,float16,float16,0,6.474751790364583
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,24,1,128,1,float16,float16,0,5.551589330037435
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,24,24,128,1,float16,fp8,0,6.363477071126302
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,24,1,128,1,float16,fp8,0,5.66590944925944
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,24,2,128,1,float16,float16,0,5.5942026774088545
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,24,2,128,1,float16,fp8,0,5.637151718139648
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,24,4,128,1,float16,float16,0,6.415514628092448
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,24,4,128,1,float16,fp8,0,6.095093409220378
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,24,24,128,1,float16,float16,0,3.0762561162312827
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,24,24,128,1,float16,fp8,0,3.2041120529174805
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,24,1,128,1,float16,float16,0,2.854586601257324
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,24,8,128,1,float16,float16,0,5.947669347127278
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,24,1,128,1,float16,fp8,0,3.4053332010904946
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,24,2,128,1,float16,float16,0,2.866304079691569
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,24,8,128,1,float16,fp8,0,6.283744176228841
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,24,2,128,1,float16,fp8,0,3.0090560913085938
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,24,4,128,1,float16,float16,0,2.988800048828125
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,24,4,128,1,float16,fp8,0,3.1879145304361978
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,24,24,128,1,float16,float16,0,1.6026825904846191
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,24,24,128,1,float16,fp8,0,1.6460426648457844
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,24,8,128,1,float16,float16,0,3.028357187906901
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,24,8,128,1,float16,fp8,0,3.0930347442626953
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,24,1,128,1,float16,float16,0,1.3989653587341309
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,24,1,128,1,float16,fp8,0,1.5382720629374187
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,24,2,128,1,float16,float16,0,1.4488800366719563
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,24,2,128,1,float16,fp8,0,1.4723787307739258
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,24,4,128,1,float16,float16,0,1.5130240122477214
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,24,4,128,1,float16,fp8,0,1.6310720443725586
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,24,8,128,1,float16,float16,0,1.539946715037028
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,24,24,128,1,float16,float16,0,0.8305280208587646
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,24,8,128,1,float16,fp8,0,1.5846986770629883
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,24,1,128,1,float16,float16,0,0.7336586316426595
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,24,24,128,1,float16,fp8,0,0.8689280351003011
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,24,1,128,1,float16,fp8,0,0.7645440101623535
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,24,2,128,1,float16,float16,0,0.7523787021636963
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,24,2,128,1,float16,fp8,0,0.7724800109863281
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,24,4,128,1,float16,float16,0,0.7858400344848633
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,24,4,128,1,float16,fp8,0,0.8279946645100912
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,24,8,128,1,float16,float16,0,0.8041653633117676
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,24,8,128,1,float16,fp8,0,0.8439359664916992
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,24,1,128,1,float16,float16,0,6.431674957275391
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,24,1,128,1,float16,fp8,0,6.454373041788737
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,24,2,128,1,float16,float16,0,7.098906834920247
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,24,2,128,1,float16,fp8,0,6.577322642008464
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,24,4,128,1,float16,float16,0,6.869871775309245
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,24,4,128,1,float16,fp8,0,7.138794581095378
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,24,8,128,1,float16,float16,0,7.552618662516276
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,24,8,128,1,float16,fp8,0,7.042234420776367
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,24,24,128,1,float16,float16,0,3.691722551981608
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,24,1,128,1,float16,float16,0,3.364922523498535
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,24,24,128,1,float16,fp8,0,3.82203737894694
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,24,1,128,1,float16,fp8,0,3.2639147440592446
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,24,2,128,1,float16,float16,0,3.3017279307047525
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,24,2,128,1,float16,fp8,0,3.324080149332682
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,24,4,128,1,float16,float16,0,3.5674613316853843
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,24,24,128,1,float16,float16,0,1.8429439862569172
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,24,1,128,1,float16,float16,0,1.641599973042806
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,24,24,128,1,float16,fp8,0,1.9489760398864746
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,24,4,128,1,float16,fp8,0,3.5611785252889
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,24,8,128,1,float16,float16,0,3.540634791056315
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,24,1,128,1,float16,fp8,0,1.6550240516662598
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,24,8,128,1,float16,fp8,0,3.7610880533854165
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,24,2,128,1,float16,float16,0,1.6850132942199707
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,24,2,128,1,float16,fp8,0,1.7112266222635906
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,24,4,128,1,float16,float16,0,1.7591840426127117
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,24,4,128,1,float16,fp8,0,1.8407467206319172
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,24,8,128,1,float16,float16,0,1.784645398457845
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,24,24,128,1,float16,float16,0,0.9654186566670736
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,24,24,128,1,float16,fp8,0,1.010864019393921
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,24,1,128,1,float16,float16,0,0.8386826515197754
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,24,8,128,1,float16,fp8,0,1.9043679237365723
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,24,1,128,1,float16,fp8,0,0.8669013182322184
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,24,2,128,1,float16,float16,0,0.8525333404541016
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,24,2,128,1,float16,fp8,0,0.8806666533152262
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,24,4,128,1,float16,float16,0,0.9029920101165771
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,24,4,128,1,float16,fp8,0,0.9405653476715088
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,24,8,128,1,float16,float16,0,0.9244053363800049
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,24,8,128,1,float16,fp8,0,0.9697279930114746
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,24,24,128,1,float16,float16,0,0.5056693156560262
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,24,24,128,1,float16,fp8,0,0.5406506856282552
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,24,1,128,1,float16,float16,0,0.4408426682154338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,24,1,128,1,float16,fp8,0,0.4559893210728963
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,24,2,128,1,float16,float16,0,0.4482666651407878
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,24,2,128,1,float16,fp8,0,0.46568532784779865
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,24,4,128,1,float16,float16,0,0.4782346487045288
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,24,4,128,1,float16,fp8,0,0.5084906816482544
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,24,8,128,1,float16,float16,0,0.48419201374053955
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,24,8,128,1,float16,fp8,0,0.5179893175760905
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,24,1,128,1,float16,float16,0,6.476005554199219
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,24,1,128,1,float16,fp8,0,6.095600128173828
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,24,2,128,1,float16,float16,0,6.6750132242838545
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,24,2,128,1,float16,fp8,0,6.199685414632161
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,24,4,128,1,float16,float16,0,7.148687998453776
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,24,4,128,1,float16,fp8,0,7.027242660522461
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,24,8,128,1,float16,float16,0,7.2074025472005205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,24,8,128,1,float16,fp8,0,7.07807985941569
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,24,24,128,1,float16,float16,0,3.587162653605143
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,24,1,128,1,float16,float16,0,3.0205281575520835
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,24,24,128,1,float16,fp8,0,3.808346748352051
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,24,1,128,1,float16,fp8,0,3.0548534393310547
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,24,2,128,1,float16,float16,0,3.301722526550293
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,24,2,128,1,float16,fp8,0,3.1700798670450845
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,24,4,128,1,float16,float16,0,3.4405441284179688
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,24,4,128,1,float16,fp8,0,3.5146560668945312
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,24,24,128,1,float16,float16,0,1.8337599436442058
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,24,1,128,1,float16,float16,0,1.5340906778971355
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,24,24,128,1,float16,fp8,0,1.9223039944966633
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,24,1,128,1,float16,fp8,0,1.5751466751098633
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,24,8,128,1,float16,float16,0,3.4051361083984375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,24,2,128,1,float16,float16,0,1.580399990081787
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,24,8,128,1,float16,fp8,0,3.6820265452067056
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,24,2,128,1,float16,fp8,0,1.5960373878479004
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,24,4,128,1,float16,float16,0,1.7070239384969075
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,24,4,128,1,float16,fp8,0,1.786688009897868
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,24,8,128,1,float16,float16,0,1.7330986658732097
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,24,8,128,1,float16,fp8,0,1.857866605122884
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,24,24,128,1,float16,float16,0,0.9400160312652588
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,24,1,128,1,float16,float16,0,0.7659520308176676
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,24,24,128,1,float16,fp8,0,0.9990506966908773
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,24,1,128,1,float16,fp8,0,0.8061652978261312
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,24,2,128,1,float16,float16,0,0.8066240151723226
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,24,2,128,1,float16,fp8,0,0.8251146475474039
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,24,4,128,1,float16,float16,0,0.8639893531799316
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,24,8,128,1,float16,fp8,0,0.9379786650339762
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,24,8,128,1,float16,float16,0,0.8753493626912435
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,24,4,128,1,float16,fp8,0,0.9454987049102783
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,24,24,128,1,float16,float16,0,0.48712531725565594
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,24,24,128,1,float16,fp8,0,0.5219093163808187
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,24,1,128,1,float16,float16,0,0.40355201562245685
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,24,1,128,1,float16,fp8,0,0.42989333470662433
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,24,2,128,1,float16,float16,0,0.4208586613337199
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,24,2,128,1,float16,fp8,0,0.43592532475789386
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,24,4,128,1,float16,float16,0,0.4567573467890422
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,24,4,128,1,float16,fp8,0,0.48932798703511554
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,24,8,128,1,float16,float16,0,0.46377066771189374
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,24,8,128,1,float16,fp8,0,0.5005706548690796
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,24,24,128,1,float16,float16,0,0.26682132482528687
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,24,1,128,1,float16,float16,0,0.21805866559346518
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,24,24,128,1,float16,fp8,0,0.2897546688715617
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,24,1,128,1,float16,fp8,0,0.2330026626586914
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,24,2,128,1,float16,float16,0,0.2288586695988973
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,24,4,128,1,float16,fp8,0,0.2705120046933492
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,24,2,128,1,float16,fp8,0,0.2398293415705363
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,24,4,128,1,float16,float16,0,0.25008533398310345
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,24,8,128,1,float16,float16,0,0.252346674601237
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,24,8,128,1,float16,fp8,0,0.2752319971720378
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,24,1,128,1,float16,float16,0,3.5352373123168945
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,24,1,128,1,float16,fp8,0,3.6659199396769204
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,24,2,128,1,float16,float16,0,3.6744747161865234
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,24,2,128,1,float16,fp8,0,3.776362737019857
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,24,4,128,1,float16,float16,0,4.188768068949382
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,24,4,128,1,float16,fp8,0,4.300421396891276
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,24,8,128,1,float16,float16,0,4.14193598429362
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,24,24,128,1,float16,float16,0,2.2790719668070474
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,24,1,128,1,float16,float16,0,1.8441386222839355
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,24,1,128,1,float16,fp8,0,1.8547786076863606
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,24,24,128,1,float16,fp8,0,2.415173371632894
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,24,2,128,1,float16,float16,0,1.893514633178711
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,24,2,128,1,float16,fp8,0,1.9132320086161296
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,24,4,128,1,float16,float16,0,2.0808587074279785
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,24,24,128,1,float16,float16,0,1.1447359720865886
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,24,8,128,1,float16,float16,0,2.0770773887634277
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,24,4,128,1,float16,fp8,0,2.3564213116963706
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,24,1,128,1,float16,float16,0,0.8910293579101562
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,24,8,128,1,float16,fp8,0,2.2099787394205728
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,24,24,128,1,float16,fp8,0,1.2708426316579182
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,24,1,128,1,float16,fp8,0,0.9760639667510986
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,24,8,128,1,float16,fp8,0,4.3000532786051435
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,24,2,128,1,float16,float16,0,0.9429706732432047
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,24,2,128,1,float16,fp8,0,0.9960959752400717
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,24,4,128,1,float16,float16,0,1.0608533223470051
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,24,4,128,1,float16,fp8,0,1.1266559759775798
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,24,8,128,1,float16,float16,0,1.0763359864552815
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,24,8,128,1,float16,fp8,0,1.1413866678873699
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,24,24,128,1,float16,float16,0,0.5840799808502197
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,24,1,128,1,float16,float16,0,0.46434664726257324
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,24,1,128,1,float16,fp8,0,0.4986986716588338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,24,2,128,1,float16,float16,0,0.49029334386189777
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,24,24,128,1,float16,fp8,0,0.6373120148976644
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,24,2,128,1,float16,fp8,0,0.5109866857528687
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,24,8,128,1,float16,fp8,0,0.5976959864298502
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,24,4,128,1,float16,float16,0,0.5450239976247152
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,24,4,128,1,float16,fp8,0,0.5926293134689331
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,24,8,128,1,float16,float16,0,0.5481173197428385
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,24,24,128,1,float16,float16,0,0.31063999732335407
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,24,24,128,1,float16,fp8,0,0.3447360197703044
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,24,1,128,1,float16,float16,0,0.24963732560475668
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,24,1,128,1,float16,fp8,0,0.2672053376833598
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,24,2,128,1,float16,float16,0,0.2624906698862712
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,24,2,128,1,float16,fp8,0,0.27859199047088623
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,24,4,128,1,float16,float16,0,0.29100267092386883
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,24,4,128,1,float16,fp8,0,0.31813865900039673
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,24,8,128,1,float16,float16,0,0.29517332712809247
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,24,8,128,1,float16,fp8,0,0.32363200187683105
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,24,24,128,1,float16,float16,0,0.1789813240369161
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,24,24,128,1,float16,fp8,0,0.19382399320602417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,24,1,128,1,float16,float16,0,0.14110400279362997
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,24,1,128,1,float16,fp8,0,0.15027200182278952
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,24,2,128,1,float16,float16,0,0.14496533075968424
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,24,2,128,1,float16,fp8,0,0.15338133772214255
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,24,4,128,1,float16,float16,0,0.16477866967519125
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,24,4,128,1,float16,fp8,0,0.1725226640701294
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,24,8,128,1,float16,float16,0,0.16727999846140543
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,24,8,128,1,float16,fp8,0,0.1818880041440328
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,24,1,128,1,float16,float16,0,3.4844481150309243
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,24,1,128,1,float16,fp8,0,3.6511360804239907
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,24,2,128,1,float16,float16,0,3.676981290181478
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,24,4,128,1,float16,float16,0,4.193509419759114
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,24,2,128,1,float16,fp8,0,3.8170560201009116
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,24,4,128,1,float16,fp8,0,4.568170547485352
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,24,8,128,1,float16,float16,0,4.347434679667155
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,24,24,128,1,float16,float16,0,2.3716959953308105
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,24,8,128,1,float16,fp8,0,4.589834531148274
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,24,1,128,1,float16,float16,0,1.7067359288533528
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,24,24,128,1,float16,fp8,0,2.6421119372049966
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,24,1,128,1,float16,fp8,0,1.8381867408752441
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,24,2,128,1,float16,float16,0,1.817797342936198
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,24,2,128,1,float16,fp8,0,1.993333339691162
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,24,4,128,1,float16,float16,0,2.1506773630777993
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,24,4,128,1,float16,fp8,0,2.284682591756185
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,24,8,128,1,float16,float16,0,2.149328072865804
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,24,24,128,1,float16,float16,0,1.1882719993591309
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,24,8,128,1,float16,fp8,0,2.3557492891947427
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,24,1,128,1,float16,float16,0,0.879701296488444
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,24,24,128,1,float16,fp8,0,1.3148053487141926
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,24,2,128,1,float16,float16,0,0.9365706443786621
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,24,1,128,1,float16,fp8,0,0.947002649307251
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,24,2,128,1,float16,fp8,0,0.984608014424642
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,24,4,128,1,float16,float16,0,1.0760107040405273
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,24,4,128,1,float16,fp8,0,1.1707093715667725
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,24,8,128,1,float16,float16,0,1.095098654429118
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,24,8,128,1,float16,fp8,0,1.1826666990915935
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,24,24,128,1,float16,float16,0,0.6021973292032877
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,24,1,128,1,float16,float16,0,0.4471786816914876
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,24,24,128,1,float16,fp8,0,0.6766239802042643
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,24,1,128,1,float16,fp8,0,0.4848959843317668
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,24,4,128,1,float16,float16,0,0.551471988360087
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,24,2,128,1,float16,float16,0,0.486682653427124
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,24,2,128,1,float16,fp8,0,0.510586659113566
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,24,8,128,1,float16,float16,0,0.557151993115743
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,24,4,128,1,float16,fp8,0,0.6041866540908813
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,24,8,128,1,float16,fp8,0,0.6177226702372233
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,24,24,128,1,float16,float16,0,0.3230133255322774
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,24,24,128,1,float16,fp8,0,0.3551520109176636
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,24,1,128,1,float16,float16,0,0.24188266197840372
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,24,1,128,1,float16,fp8,0,0.26126933097839355
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,24,2,128,1,float16,float16,0,0.2593760093053182
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,24,4,128,1,float16,float16,0,0.29241599639256793
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,24,4,128,1,float16,fp8,0,0.3235413432121277
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,24,2,128,1,float16,fp8,0,0.2727680007616679
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,24,8,128,1,float16,float16,0,0.297541340192159
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,24,8,128,1,float16,fp8,0,0.33313600222269696
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,24,24,128,1,float16,float16,0,0.17563199996948242
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,24,24,128,1,float16,fp8,0,0.1960373322168986
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,24,1,128,1,float16,float16,0,0.13316266735394797
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,24,1,128,1,float16,fp8,0,0.14226133624712625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,24,2,128,1,float16,float16,0,0.14502933621406555
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,24,2,128,1,float16,fp8,0,0.1532426675160726
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,24,4,128,1,float16,float16,0,0.16005866726239523
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,24,4,128,1,float16,fp8,0,0.1779680053393046
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,24,8,128,1,float16,float16,0,0.16621333360671997
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,24,8,128,1,float16,fp8,0,0.18307733535766602
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,24,24,128,1,float16,float16,0,0.10168000062306722
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,24,24,128,1,float16,fp8,0,0.11365866661071777
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,24,1,128,1,float16,float16,0,0.0739519993464152
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,24,1,128,1,float16,fp8,0,0.08098666866620381
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,24,2,128,1,float16,float16,0,0.07780799766381581
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,24,2,128,1,float16,fp8,0,0.08703466256459554
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,24,4,128,1,float16,float16,0,0.08707732955614726
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,24,4,128,1,float16,fp8,0,0.09661333759625752
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,24,8,128,1,float16,float16,0,0.09388267000516255
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,24,8,128,1,float16,fp8,0,0.09803199768066406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,24,1,128,1,float16,float16,0,2.1145599683125815
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,24,1,128,1,float16,fp8,0,2.2945119539896646
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,24,2,128,1,float16,float16,0,2.2489013671875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,24,2,128,1,float16,fp8,0,2.419861316680908
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,24,4,128,1,float16,float16,0,2.698063850402832
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,24,4,128,1,float16,fp8,0,2.9585866928100586
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,24,24,128,1,float16,float16,0,1.5397334098815918
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,24,8,128,1,float16,float16,0,2.702986717224121
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,24,8,128,1,float16,fp8,0,2.9417012532552085
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,24,1,128,1,float16,float16,0,1.0614559650421143
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,24,24,128,1,float16,fp8,0,1.7241867383321126
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,24,1,128,1,float16,fp8,0,1.1754186948140461
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,24,2,128,1,float16,float16,0,1.1469919681549072
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,24,2,128,1,float16,fp8,0,1.2245173454284668
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,24,4,128,1,float16,float16,0,1.3752800623575847
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,24,4,128,1,float16,fp8,0,1.501962661743164
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,24,8,128,1,float16,float16,0,1.3786826133728027
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,24,24,128,1,float16,float16,0,0.7933386961619059
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,24,8,128,1,float16,fp8,0,1.528666655222575
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,24,24,128,1,float16,fp8,0,0.8845173517862955
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,24,1,128,1,float16,float16,0,0.5442773501078287
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,24,1,128,1,float16,fp8,0,0.6002346674601237
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,24,2,128,1,float16,fp8,0,0.6292746861775717
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,24,4,128,1,float16,fp8,0,0.7812853654225668
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,24,4,128,1,float16,float16,0,0.6905653476715088
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,24,8,128,1,float16,float16,0,0.7016639709472656
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,24,2,128,1,float16,float16,0,0.6009813149770101
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,24,24,128,1,float16,float16,0,0.4010666608810425
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,24,8,128,1,float16,fp8,0,0.80294402440389
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,24,24,128,1,float16,fp8,0,0.45657066504160565
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,24,1,128,1,float16,float16,0,0.28778666257858276
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,24,1,128,1,float16,fp8,0,0.3165653347969055
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,24,2,128,1,float16,float16,0,0.3109760085741679
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,24,2,128,1,float16,fp8,0,0.32917867104212445
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,24,4,128,1,float16,float16,0,0.36741332213083905
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,24,4,128,1,float16,fp8,0,0.40540798505147296
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,24,8,128,1,float16,float16,0,0.37035731474558514
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,24,8,128,1,float16,fp8,0,0.41597867012023926
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,24,1,128,1,float16,fp8,0,0.17569599548975626
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,24,24,128,1,float16,float16,0,0.2144533395767212
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,24,24,128,1,float16,fp8,0,0.24489066998163858
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,24,1,128,1,float16,float16,0,0.15710399548212686
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,24,2,128,1,float16,float16,0,0.17140799760818481
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,24,2,128,1,float16,fp8,0,0.18330667416254678
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,24,4,128,1,float16,float16,0,0.19803732633590698
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,24,4,128,1,float16,fp8,0,0.22061866521835327
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,24,8,128,1,float16,float16,0,0.20223466555277506
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,24,8,128,1,float16,fp8,0,0.22362667322158813
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,24,24,128,1,float16,float16,0,0.11926933129628499
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,24,24,128,1,float16,fp8,0,0.13916800419489542
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,24,1,128,1,float16,float16,0,0.0865066647529602
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,24,1,128,1,float16,fp8,0,0.09467732906341553
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,24,2,128,1,float16,float16,0,0.09546132882436116
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,24,2,128,1,float16,fp8,0,0.10193066795667012
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,24,4,128,1,float16,float16,0,0.10813333590825398
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,24,24,128,1,float16,float16,0,0.0718560020128886
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,24,4,128,1,float16,fp8,0,0.11683199803034465
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,24,8,128,1,float16,float16,0,0.11255466938018799
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,24,8,128,1,float16,fp8,0,0.1281013290087382
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,24,24,128,1,float16,fp8,0,0.08082133531570435
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,24,1,128,1,float16,float16,0,0.05406400064627329
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,24,1,128,1,float16,fp8,0,0.06086933116118113
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,24,2,128,1,float16,float16,0,0.05412266651789347
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,24,2,128,1,float16,fp8,0,0.06196799874305725
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,24,8,128,1,float16,fp8,0,0.07073066631952922
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,24,4,128,1,float16,float16,0,0.0603413333495458
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,24,4,128,1,float16,fp8,0,0.06819200019041698
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,24,8,128,1,float16,float16,0,0.06132799883683523
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,24,1,128,1,float16,float16,0,2.2025440533955893
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,24,2,128,1,float16,float16,0,2.4234506289164224
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,24,2,128,1,float16,fp8,0,2.6038079261779785
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,24,1,128,1,float16,fp8,0,2.4621599515279136
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,24,8,128,1,float16,float16,0,3.0556373596191406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,24,4,128,1,float16,float16,0,3.0083999633789062
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,24,4,128,1,float16,fp8,0,3.364159901936849
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,24,1,128,1,float16,float16,0,1.1216693719228108
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,24,1,128,1,float16,fp8,0,1.247221310933431
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,24,8,128,1,float16,fp8,0,3.316346804300944
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,24,24,128,1,float16,fp8,0,2.0200533866882324
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,24,2,128,1,float16,float16,0,1.2100319862365723
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,24,2,128,1,float16,fp8,0,1.3233333428700764
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,24,4,128,1,float16,float16,0,1.5155092875162761
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,24,24,128,1,float16,float16,0,1.7193493843078613
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,24,24,128,1,float16,float16,0,0.8628426392873129
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,24,8,128,1,float16,float16,0,1.5146133104960124
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,24,4,128,1,float16,fp8,0,1.6999732653299968
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,24,1,128,1,float16,float16,0,0.5742986599604288
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,24,8,128,1,float16,fp8,0,1.75763734181722
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,24,24,128,1,float16,fp8,0,1.0224586327870686
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,24,1,128,1,float16,fp8,0,0.642197330792745
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,24,2,128,1,float16,float16,0,0.6346240043640137
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,24,2,128,1,float16,fp8,0,0.6729119618733724
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,24,4,128,1,float16,float16,0,0.766874631245931
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,24,4,128,1,float16,fp8,0,0.8680426279703776
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,24,8,128,1,float16,float16,0,0.7890933354695638
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,24,24,128,1,float16,float16,0,0.4466186761856079
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,24,8,128,1,float16,fp8,0,0.90447465578715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,24,24,128,1,float16,fp8,0,0.528218666712443
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,24,1,128,1,float16,float16,0,0.2956106662750244
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,24,1,128,1,float16,fp8,0,0.3316319982210795
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,24,2,128,1,float16,float16,0,0.33164799213409424
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,24,2,128,1,float16,fp8,0,0.3533066511154175
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,24,4,128,1,float16,float16,0,0.40130666891733807
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,24,8,128,1,float16,float16,0,0.405898650487264
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,24,4,128,1,float16,fp8,0,0.45562132199605304
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,24,8,128,1,float16,fp8,0,0.46221331755320233
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,24,24,128,1,float16,float16,0,0.23451733589172363
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,24,1,128,1,float16,float16,0,0.16004799803098044
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,24,24,128,1,float16,fp8,0,0.27542932828267414
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,24,1,128,1,float16,fp8,0,0.18175999323527017
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,24,2,128,1,float16,float16,0,0.17984533309936523
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,24,2,128,1,float16,fp8,0,0.19384533166885376
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,24,4,128,1,float16,float16,0,0.21369600296020508
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,24,4,128,1,float16,fp8,0,0.24203733603159586
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,24,8,128,1,float16,float16,0,0.2132426699002584
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,24,8,128,1,float16,fp8,0,0.24980799357096353
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,24,24,128,1,float16,float16,0,0.12784533699353537
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,24,24,128,1,float16,fp8,0,0.1535360018412272
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,24,1,128,1,float16,float16,0,0.09031466643015544
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,24,1,128,1,float16,fp8,0,0.10062932968139648
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,24,2,128,1,float16,float16,0,0.09962133566538493
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,24,2,128,1,float16,fp8,0,0.10900266965230306
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,24,4,128,1,float16,float16,0,0.1163093348344167
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,24,4,128,1,float16,fp8,0,0.13326932986577353
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,24,8,128,1,float16,float16,0,0.11825600266456604
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,24,8,128,1,float16,fp8,0,0.139082670211792
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,24,24,128,1,float16,float16,0,0.07505066692829132
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,24,24,128,1,float16,fp8,0,0.08906666437784831
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,24,1,128,1,float16,float16,0,0.05093866586685181
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,24,1,128,1,float16,fp8,0,0.05826666454474131
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,24,2,128,1,float16,float16,0,0.05387733379999796
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,24,2,128,1,float16,fp8,0,0.06211199859778086
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,24,24,128,1,float16,float16,0,0.04329599936803182
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,24,4,128,1,float16,float16,0,0.06170133252938589
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,24,4,128,1,float16,fp8,0,0.06955733398596446
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,24,8,128,1,float16,float16,0,0.06725333134333293
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,24,8,128,1,float16,fp8,0,0.07657066484292348
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,24,24,128,1,float16,fp8,0,0.0505973349014918
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,24,1,128,1,float16,float16,0,0.03419733295838038
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,24,1,128,1,float16,fp8,0,0.03909866760174433
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,24,2,128,1,float16,float16,0,0.03570666660865148
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,24,2,128,1,float16,fp8,0,0.040922666589419045
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,24,4,128,1,float16,float16,0,0.039861333866914116
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,24,4,128,1,float16,fp8,0,0.045647998650868736
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,24,8,128,1,float16,float16,0,0.04004266609748205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,24,8,128,1,float16,fp8,0,0.04643199841181437
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,24,1,128,1,float16,float16,0,1.6361865997314453
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,24,1,128,1,float16,fp8,0,1.8772320747375488
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,24,2,128,1,float16,float16,0,1.8599093755086262
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,24,2,128,1,float16,fp8,0,2.0564746856689453
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,24,4,128,1,float16,float16,0,2.425685405731201
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,24,4,128,1,float16,fp8,0,2.789440155029297
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,24,24,128,1,float16,float16,0,1.4183200200398762
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,24,8,128,1,float16,float16,0,2.4552000363667807
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,24,1,128,1,float16,float16,0,0.8431946436564127
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,24,24,128,1,float16,fp8,0,1.8133440017700195
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,24,8,128,1,float16,fp8,0,2.8900321324666343
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,24,2,128,1,float16,fp8,0,1.033951997756958
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,24,1,128,1,float16,fp8,0,0.9541119734446207
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,24,4,128,1,float16,fp8,0,1.4206239382425945
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,24,24,128,1,float16,fp8,0,0.9121973514556885
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,24,1,128,1,float16,float16,0,0.42765335241953534
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,24,2,128,1,float16,float16,0,0.9272639751434326
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,24,1,128,1,float16,fp8,0,0.4926826556523641
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,24,2,128,1,float16,float16,0,0.49644800027211505
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,24,24,128,1,float16,float16,0,0.7197120189666748
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,24,4,128,1,float16,float16,0,0.6197386582692465
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,24,4,128,1,float16,float16,0,1.2248053550720215
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,24,2,128,1,float16,fp8,0,0.5310293436050415
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,24,8,128,1,float16,fp8,0,1.500688076019287
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,24,8,128,1,float16,float16,0,1.2593066692352295
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,24,1,128,1,float16,float16,0,0.225983997186025
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,24,4,128,1,float16,fp8,0,0.7257013320922852
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,24,24,128,1,float16,float16,0,0.3709546724955241
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,24,8,128,1,float16,float16,0,0.6419093211491903
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,24,24,128,1,float16,fp8,0,0.46723731358846027
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,24,2,128,1,float16,fp8,0,0.2776106595993042
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,24,8,128,1,float16,fp8,0,0.7711146672566732
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,24,4,128,1,float16,float16,0,0.32691200574239093
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,24,4,128,1,float16,fp8,0,0.3808853228886922
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,24,8,128,1,float16,float16,0,0.32860267162323
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,24,24,128,1,float16,float16,0,0.19598400592803955
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,24,8,128,1,float16,fp8,0,0.3949226538340251
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,24,2,128,1,float16,float16,0,0.2579626639684041
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,24,1,128,1,float16,fp8,0,0.2572159965833028
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,24,1,128,1,float16,float16,0,0.12417599558830261
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,24,1,128,1,float16,fp8,0,0.14127467075983682
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,24,24,128,1,float16,fp8,0,0.24565333127975464
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,24,2,128,1,float16,float16,0,0.13769066333770752
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,24,2,128,1,float16,fp8,0,0.15160533785820007
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,24,4,128,1,float16,float16,0,0.16909333070119223
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,24,4,128,1,float16,fp8,0,0.20453866322835287
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,24,8,128,1,float16,float16,0,0.17619200547536215
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,24,8,128,1,float16,fp8,0,0.214954674243927
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,24,24,128,1,float16,float16,0,0.10756799578666687
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,24,24,128,1,float16,fp8,0,0.13499733805656433
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,24,1,128,1,float16,float16,0,0.07056533296902974
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,24,1,128,1,float16,fp8,0,0.07984533409277599
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,24,2,128,1,float16,float16,0,0.07796800136566162
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,24,2,128,1,float16,fp8,0,0.08737599849700928
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,24,4,128,1,float16,float16,0,0.09446932872136433
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,24,4,128,1,float16,fp8,0,0.11213866869608562
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,24,8,128,1,float16,fp8,0,0.11785067121187846
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,24,8,128,1,float16,float16,0,0.09630399942398071
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,24,24,128,1,float16,float16,0,0.06182933350404104
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,24,24,128,1,float16,fp8,0,0.07811200122038524
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,24,1,128,1,float16,float16,0,0.039893334110577904
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,24,4,128,1,float16,fp8,0,0.0582239975531896
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,24,1,128,1,float16,fp8,0,0.04640000065167745
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,24,2,128,1,float16,float16,0,0.04261333247025808
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,24,2,128,1,float16,fp8,0,0.04984533290068308
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,24,4,128,1,float16,float16,0,0.04859733581542969
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,24,8,128,1,float16,float16,0,0.05338666836420695
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,24,8,128,1,float16,fp8,0,0.06218666831652323
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,24,24,128,1,float16,float16,0,0.034671999514102936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,24,24,128,1,float16,fp8,0,0.043840001026789345
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,24,1,128,1,float16,float16,0,0.02718399961789449
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,24,1,128,1,float16,fp8,0,0.03070399910211563
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,24,2,128,1,float16,float16,0,0.028325334191322327
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,24,2,128,1,float16,fp8,0,0.033759998778502144
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,24,4,128,1,float16,float16,0,0.03073066721359889
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,24,4,128,1,float16,fp8,0,0.038586666186650596
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,24,8,128,1,float16,float16,0,0.030693332354227703
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,24,8,128,1,float16,fp8,0,0.03897066662708918
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,24,24,128,1,float16,float16,0,0.022341333329677582
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,24,24,128,1,float16,fp8,0,0.028165332973003387
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,24,1,128,1,float16,float16,0,0.019472000499566395
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,24,1,128,1,float16,fp8,0,0.02386133372783661
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,24,2,128,1,float16,float16,0,0.019802667200565338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,24,2,128,1,float16,fp8,0,0.023584000766277313
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,24,4,128,1,float16,float16,0,0.02124800036350886
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,24,4,128,1,float16,fp8,0,0.026746665438016255
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,24,8,128,1,float16,float16,0,0.021482666333516438
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,24,8,128,1,float16,fp8,0,0.02719466636578242
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,24,1,128,1,float16,float16,0,0.694976011912028
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,24,1,128,1,float16,fp8,0,0.8110026518503824
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,24,2,128,1,float16,float16,0,0.7851733366648356
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,24,2,128,1,float16,fp8,0,0.8859840234120687
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,24,4,128,1,float16,float16,0,1.0823840300242107
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,24,4,128,1,float16,fp8,0,1.283077319463094
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,24,24,128,1,float16,float16,0,0.6516693433125814
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,24,24,128,1,float16,fp8,0,0.7939519882202148
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,24,1,128,1,float16,float16,0,0.35489598910013836
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,24,1,128,1,float16,fp8,0,0.4180479844411214
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,24,8,128,1,float16,fp8,0,1.3478933970133464
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,24,8,128,1,float16,float16,0,1.1203680038452148
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,24,2,128,1,float16,float16,0,0.4256480137507121
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,24,2,128,1,float16,fp8,0,0.4571839968363444
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,24,4,128,1,float16,float16,0,0.5497546593348185
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,24,4,128,1,float16,fp8,0,0.6553546587626139
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,24,8,128,1,float16,float16,0,0.568832000096639
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,24,24,128,1,float16,fp8,0,0.4102933406829834
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,24,1,128,1,float16,float16,0,0.18968532482783
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,24,1,128,1,float16,fp8,0,0.2214453419049581
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,24,8,128,1,float16,fp8,0,0.6939093271891276
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,24,2,128,1,float16,float16,0,0.21997867027918497
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,24,2,128,1,float16,fp8,0,0.24253867069880167
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,24,24,128,1,float16,float16,0,0.336575984954834
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,24,4,128,1,float16,float16,0,0.28599466880162555
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,24,8,128,1,float16,float16,0,0.28995199998219806
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,24,4,128,1,float16,fp8,0,0.34222400188446045
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,24,8,128,1,float16,fp8,0,0.3577333291371663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,24,24,128,1,float16,float16,0,0.17761067549387613
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,24,24,128,1,float16,fp8,0,0.21541333198547363
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,24,1,128,1,float16,float16,0,0.104010671377182
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,24,1,128,1,float16,fp8,0,0.12232533097267151
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,24,2,128,1,float16,float16,0,0.11936533451080322
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,24,2,128,1,float16,fp8,0,0.1320853332678477
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,24,8,128,1,float16,fp8,0,0.19425066312154135
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,24,4,128,1,float16,float16,0,0.1513653298219045
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,24,4,128,1,float16,fp8,0,0.18245333433151245
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,24,24,128,1,float16,float16,0,0.09840533137321472
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,24,24,128,1,float16,fp8,0,0.11965866883595784
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,24,1,128,1,float16,float16,0,0.06009600063165029
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,24,4,128,1,float16,float16,0,0.08349866668383281
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,24,1,128,1,float16,fp8,0,0.0680266668399175
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,24,2,128,1,float16,float16,0,0.06709333260854085
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,24,8,128,1,float16,float16,0,0.1581493318080902
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,24,2,128,1,float16,fp8,0,0.07610133290290833
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,24,4,128,1,float16,fp8,0,0.10187733173370361
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,24,8,128,1,float16,float16,0,0.08597333232561748
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,24,8,128,1,float16,fp8,0,0.10789866248766582
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,24,24,128,1,float16,float16,0,0.055488000313440956
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,24,24,128,1,float16,fp8,0,0.06824000179767609
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,24,1,128,1,float16,float16,0,0.032058666149775185
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,24,1,128,1,float16,fp8,0,0.03877866764863332
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,24,2,128,1,float16,float16,0,0.03507733345031738
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,24,2,128,1,float16,fp8,0,0.042064001162846885
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,24,4,128,1,float16,float16,0,0.040634666879971824
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,24,4,128,1,float16,fp8,0,0.05231999854246775
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,24,8,128,1,float16,float16,0,0.04696000119050344
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,24,8,128,1,float16,fp8,0,0.0544053316116333
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,24,2,128,1,float16,float16,0,0.023728000621000927
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,24,24,128,1,float16,float16,0,0.02994133283694585
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,24,24,128,1,float16,fp8,0,0.03822933385769526
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,24,1,128,1,float16,float16,0,0.0227360005180041
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,24,1,128,1,float16,fp8,0,0.02735999971628189
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,24,2,128,1,float16,fp8,0,0.02844800055027008
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,24,4,128,1,float16,float16,0,0.026602665583292644
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,24,4,128,1,float16,fp8,0,0.03251733382542928
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,24,8,128,1,float16,float16,0,0.02685333291689555
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,24,8,128,1,float16,fp8,0,0.03316266586383184
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,24,24,128,1,float16,float16,0,0.019519999623298645
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,24,24,128,1,float16,fp8,0,0.025055999557177227
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,24,1,128,1,float16,float16,0,0.01720533271630605
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,24,1,128,1,float16,fp8,0,0.01979200045267741
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,24,2,128,1,float16,float16,0,0.017375999440749485
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,24,2,128,1,float16,fp8,0,0.019600000232458115
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,24,4,128,1,float16,float16,0,0.018538666268189747
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,24,4,128,1,float16,fp8,0,0.022895999252796173
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,24,8,128,1,float16,float16,0,0.018613333503405254
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,24,8,128,1,float16,fp8,0,0.023130667706330616
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,24,24,128,1,float16,float16,0,0.01716800034046173
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,24,24,128,1,float16,fp8,0,0.02295999974012375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,24,1,128,1,float16,float16,0,0.016501333564519882
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,24,1,128,1,float16,fp8,0,0.019189332922299702
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,24,2,128,1,float16,float16,0,0.016586666305859882
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,24,2,128,1,float16,fp8,0,0.01940800001223882
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,24,4,128,1,float16,float16,0,0.016832000265518825
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,24,4,128,1,float16,fp8,0,0.021231998999913532
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,24,8,128,1,float16,fp8,0,0.02091199904680252
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,24,8,128,1,float16,float16,0,0.01695999999841054
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,24,1,128,1,float16,float16,0,0.3326293428738912
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,24,1,128,1,float16,fp8,0,0.3729066848754883
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,24,2,128,1,float16,float16,0,0.4001760085423787
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,24,24,128,1,float16,float16,0,0.31933865944544476
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,24,2,128,1,float16,fp8,0,0.4096693197886149
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,24,24,128,1,float16,fp8,0,0.3677226702372233
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,24,4,128,1,float16,float16,0,0.523306647936503
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,24,4,128,1,float16,fp8,0,0.5950560172398885
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,24,1,128,1,float16,float16,0,0.17882666985193887
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,24,8,128,1,float16,float16,0,0.5410720109939575
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,24,8,128,1,float16,fp8,0,0.6116533279418945
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,24,2,128,1,float16,float16,0,0.20922134319941202
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,24,1,128,1,float16,fp8,0,0.19764800866444907
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,24,8,128,1,float16,float16,0,0.27956799666086835
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,24,4,128,1,float16,float16,0,0.2744906743367513
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,24,4,128,1,float16,fp8,0,0.3202880024909973
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,24,24,128,1,float16,float16,0,0.1693120002746582
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,24,2,128,1,float16,fp8,0,0.22330133120218912
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,24,8,128,1,float16,fp8,0,0.3208106756210327
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,24,1,128,1,float16,float16,0,0.10136000315348308
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,24,1,128,1,float16,fp8,0,0.11211733023325603
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,24,24,128,1,float16,fp8,0,0.19309866428375244
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,24,2,128,1,float16,float16,0,0.11702932914098103
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,24,4,128,1,float16,float16,0,0.14773333072662354
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,24,4,128,1,float16,fp8,0,0.1704960068066915
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,24,2,128,1,float16,fp8,0,0.12257599830627441
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,24,8,128,1,float16,float16,0,0.15194666385650635
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,24,8,128,1,float16,fp8,0,0.17602133750915527
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,24,1,128,1,float16,fp8,0,0.061018665631612144
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,24,24,128,1,float16,float16,0,0.0934826632340749
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,24,24,128,1,float16,fp8,0,0.1050879955291748
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,24,1,128,1,float16,float16,0,0.05712000032265981
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,24,2,128,1,float16,float16,0,0.0646613339583079
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,24,2,128,1,float16,fp8,0,0.06729599833488464
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,24,4,128,1,float16,float16,0,0.08150933186213176
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,24,4,128,1,float16,fp8,0,0.09256533781687419
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,24,8,128,1,float16,float16,0,0.08397333820660909
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,24,8,128,1,float16,fp8,0,0.09480533003807068
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,24,24,128,1,float16,float16,0,0.05365866422653198
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,24,24,128,1,float16,fp8,0,0.05899733304977417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,24,1,128,1,float16,float16,0,0.028389332195123036
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,24,1,128,1,float16,fp8,0,0.033146666983763375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,24,2,128,1,float16,float16,0,0.030954666435718536
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,24,2,128,1,float16,fp8,0,0.03603733330965042
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,24,4,128,1,float16,float16,0,0.03738666574160258
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,24,4,128,1,float16,fp8,0,0.04277333120505015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,24,8,128,1,float16,float16,0,0.041722665230433144
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,24,1,128,1,float16,float16,0,0.02111999938885371
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,24,8,128,1,float16,fp8,0,0.04580800235271454
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,24,24,128,1,float16,float16,0,0.028592000404993694
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,24,24,128,1,float16,fp8,0,0.03139200061559677
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,24,1,128,1,float16,fp8,0,0.024351999163627625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,24,2,128,1,float16,float16,0,0.022346665461858112
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,24,4,128,1,float16,float16,0,0.02478400121132533
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,24,4,128,1,float16,fp8,0,0.029264000554879505
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,24,2,128,1,float16,fp8,0,0.025744001070658367
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,24,8,128,1,float16,float16,0,0.02513599892457326
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,24,8,128,1,float16,fp8,0,0.029215998947620392
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,24,2,128,1,float16,float16,0,0.015647999942302704
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,24,2,128,1,float16,fp8,0,0.018405333161354065
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,24,24,128,1,float16,float16,0,0.017877332866191864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,24,24,128,1,float16,fp8,0,0.02045866722861926
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,24,1,128,1,float16,float16,0,0.015461333096027374
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,24,1,128,1,float16,fp8,0,0.018160000443458557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,24,4,128,1,float16,float16,0,0.01699200024207433
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,24,4,128,1,float16,fp8,0,0.01982933282852173
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,24,8,128,1,float16,float16,0,0.017125333348910015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,24,8,128,1,float16,fp8,0,0.019866666446129482
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,24,24,128,1,float16,float16,0,0.015594666202863058
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,24,24,128,1,float16,fp8,0,0.01852799952030182
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,24,1,128,1,float16,float16,0,0.015216000378131866
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,24,1,128,1,float16,fp8,0,0.01807466646035512
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,24,2,128,1,float16,float16,0,0.014746667196353277
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,24,2,128,1,float16,fp8,0,0.017525333911180496
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,24,4,128,1,float16,float16,0,0.015226667126019796
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,24,4,128,1,float16,fp8,0,0.017594666530688603
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,24,8,128,1,float16,float16,0,0.015573333948850632
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,24,8,128,1,float16,fp8,0,0.018239999810854595
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,24,24,128,1,float16,float16,0,0.015024000157912573
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,24,24,128,1,float16,fp8,0,0.017605333278576534
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,24,1,128,1,float16,float16,0,0.014767999450365702
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,24,1,128,1,float16,fp8,0,0.01728533332546552
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,24,2,128,1,float16,float16,0,0.014885333677132925
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,24,2,128,1,float16,fp8,0,0.017237332959969837
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,24,4,128,1,float16,float16,0,0.014864000181357065
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,24,4,128,1,float16,fp8,0,0.017514667163292568
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,24,8,128,1,float16,float16,0,0.014933332800865173
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,24,8,128,1,float16,fp8,0,0.017557332913080852
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,24,1,128,1,float16,float16,0,0.17822933197021484
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,24,1,128,1,float16,fp8,0,0.20074133078257242
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,24,2,128,1,float16,float16,0,0.20692267020543417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,24,2,128,1,float16,fp8,0,0.22074667612711588
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,24,4,128,1,float16,float16,0,0.2757493257522583
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,24,4,128,1,float16,fp8,0,0.32018667459487915
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,24,24,128,1,float16,float16,0,0.18805867433547974
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,24,24,128,1,float16,fp8,0,0.1894986629486084
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,24,8,128,1,float16,float16,0,0.31114667654037476
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,24,1,128,1,float16,float16,0,0.09943999846776326
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,24,8,128,1,float16,fp8,0,0.3537546793619792
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,24,2,128,1,float16,float16,0,0.1149013340473175
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,24,1,128,1,float16,fp8,0,0.11171733339627583
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,24,2,128,1,float16,fp8,0,0.12298666437466939
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,24,4,128,1,float16,float16,0,0.14787733554840088
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,24,4,128,1,float16,fp8,0,0.16884799798329672
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,24,24,128,1,float16,fp8,0,0.10382933417956035
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,24,8,128,1,float16,float16,0,0.1673120061556498
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,24,8,128,1,float16,fp8,0,0.19679999351501465
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,24,24,128,1,float16,float16,0,0.10125333070755005
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,24,1,128,1,float16,float16,0,0.05532266696294149
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,24,1,128,1,float16,fp8,0,0.06048533320426941
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,24,2,128,1,float16,float16,0,0.06558399895826976
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,24,2,128,1,float16,fp8,0,0.06943466762701671
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,24,4,128,1,float16,float16,0,0.08136000235875447
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,24,4,128,1,float16,fp8,0,0.09141866366068523
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,24,8,128,1,float16,float16,0,0.09239466985066731
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,24,8,128,1,float16,fp8,0,0.10752000411351521
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,24,24,128,1,float16,float16,0,0.05786666770776113
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,24,24,128,1,float16,fp8,0,0.05634133517742157
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,24,1,128,1,float16,float16,0,0.028309332827727
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,24,1,128,1,float16,fp8,0,0.03316266586383184
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,24,4,128,1,float16,float16,0,0.03716800113519033
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,24,2,128,1,float16,float16,0,0.030581332743167877
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,24,2,128,1,float16,fp8,0,0.03606399893760681
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,24,4,128,1,float16,fp8,0,0.04260799785455068
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,24,8,128,1,float16,float16,0,0.04489600161711375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,24,8,128,1,float16,fp8,0,0.05035733183224996
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,24,24,128,1,float16,float16,0,0.02922666569550832
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,24,24,128,1,float16,fp8,0,0.02829866607983907
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,24,1,128,1,float16,float16,0,0.0207893339296182
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,24,1,128,1,float16,fp8,0,0.024319998919963837
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,24,2,128,1,float16,float16,0,0.021738665799299877
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,24,2,128,1,float16,fp8,0,0.025775998830795288
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,24,4,128,1,float16,float16,0,0.02459733436505
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,24,4,128,1,float16,fp8,0,0.029109333952267964
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,24,8,128,1,float16,float16,0,0.02496533344189326
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,24,8,128,1,float16,fp8,0,0.02918400118748347
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,24,24,128,1,float16,float16,0,0.019205333044131596
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,24,24,128,1,float16,fp8,0,0.019786667078733444
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,24,1,128,1,float16,float16,0,0.015466666469971338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,24,1,128,1,float16,fp8,0,0.018031999468803406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,24,2,128,1,float16,float16,0,0.015706667055686314
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,24,2,128,1,float16,fp8,0,0.0183146670460701
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,24,4,128,1,float16,float16,0,0.01682666689157486
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,24,4,128,1,float16,fp8,0,0.0198186660806338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,24,8,128,1,float16,float16,0,0.016869333883126576
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,24,8,128,1,float16,fp8,0,0.019941333681344986
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,24,24,128,1,float16,float16,0,0.013760000467300415
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,24,24,128,1,float16,fp8,0,0.014757333944241205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,24,1,128,1,float16,float16,0,0.014837333311637243
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,24,1,128,1,float16,fp8,0,0.017093333105246227
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,24,2,128,1,float16,float16,0,0.014975999792416891
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,24,2,128,1,float16,fp8,0,0.01736533393462499
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,24,4,128,1,float16,float16,0,0.01544533297419548
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,24,4,128,1,float16,fp8,0,0.017808000246683758
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,24,8,128,1,float16,float16,0,0.015200000256299973
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,24,1,128,1,float16,fp8,0,0.017386666188637417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,24,8,128,1,float16,fp8,0,0.018181333939234417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,24,24,128,1,float16,fp8,0,0.014005333185195923
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,24,24,128,1,float16,float16,0,0.01313599944114685
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,24,1,128,1,float16,float16,0,0.01462399959564209
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,24,2,128,1,float16,float16,0,0.01470400020480156
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,24,24,128,1,float16,float16,0,0.012794667234023413
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,24,2,128,1,float16,fp8,0,0.017429333180189133
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,24,4,128,1,float16,float16,0,0.014885333677132925
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,24,4,128,1,float16,fp8,0,0.01727466657757759
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,24,8,128,1,float16,float16,0,0.014752000570297241
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,24,8,128,1,float16,fp8,0,0.01717866708834966
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,24,24,128,1,float16,fp8,0,0.013493333011865616
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,24,1,128,1,float16,float16,0,0.014165333161751429
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,24,1,128,1,float16,fp8,0,0.016341333587964375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,24,8,128,1,float16,fp8,0,0.016282666474580765
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,24,2,128,1,float16,float16,0,0.014159999787807465
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,24,2,128,1,float16,fp8,0,0.016255999604860943
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,24,1,128,1,float16,fp8,0,0.11197866996129353
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,24,4,128,1,float16,float16,0,0.013967999567588171
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,24,4,128,1,float16,fp8,0,0.016336000214020412
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,24,8,128,1,float16,float16,0,0.01404800017674764
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,24,1,128,1,float16,float16,0,0.09875733653704326
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,24,2,128,1,float16,float16,0,0.11382933457692464
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,24,2,128,1,float16,fp8,0,0.12212266524632771
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,24,4,128,1,float16,float16,0,0.16029333074887595
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,24,4,128,1,float16,fp8,0,0.19070400794347128
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,24,24,128,1,float16,float16,0,0.13860266407330832
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,24,8,128,1,float16,float16,0,0.16729066769282022
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,24,24,128,1,float16,fp8,0,0.13894400000572205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,24,8,128,1,float16,fp8,0,0.19473600387573242
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,24,1,128,1,float16,float16,0,0.05670933425426483
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,24,1,128,1,float16,fp8,0,0.05841066439946493
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,24,2,128,1,float16,float16,0,0.0647680014371872
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,24,2,128,1,float16,fp8,0,0.06799999872843425
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,24,4,128,1,float16,float16,0,0.08966400225957234
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,24,8,128,1,float16,float16,0,0.09362133344014485
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,24,4,128,1,float16,fp8,0,0.10327999790509541
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,24,8,128,1,float16,fp8,0,0.1067680021127065
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,24,24,128,1,float16,float16,0,0.0765226682027181
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,24,2,128,1,float16,float16,0,0.030613332986831665
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,24,24,128,1,float16,fp8,0,0.07584000130494435
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,24,1,128,1,float16,float16,0,0.02807466685771942
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,24,1,128,1,float16,fp8,0,0.03310933212439219
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,24,2,128,1,float16,fp8,0,0.03606933355331421
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,24,8,128,1,float16,float16,0,0.04699199895064036
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,24,4,128,1,float16,float16,0,0.04159466673930486
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,24,4,128,1,float16,fp8,0,0.048623998959859215
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,24,8,128,1,float16,fp8,0,0.04947733382383982
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,24,24,128,1,float16,float16,0,0.03889599939187368
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,24,24,128,1,float16,fp8,0,0.03749866783618927
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,24,1,128,1,float16,float16,0,0.020794666061798733
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,24,4,128,1,float16,fp8,0,0.029088000456492107
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,24,1,128,1,float16,fp8,0,0.02418133368094762
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,24,2,128,1,float16,float16,0,0.022074667116006214
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,24,2,128,1,float16,fp8,0,0.025653332471847534
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,24,4,128,1,float16,float16,0,0.024933333198229473
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,24,1,128,1,float16,float16,0,0.015392000476519266
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,24,8,128,1,float16,float16,0,0.024522667129834492
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,24,8,128,1,float16,fp8,0,0.02940800040960312
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,24,24,128,1,float16,float16,0,0.02269333352645238
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,24,2,128,1,float16,fp8,0,0.01811733345190684
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,24,24,128,1,float16,fp8,0,0.023269332945346832
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,24,1,128,1,float16,fp8,0,0.018005333840847015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,24,2,128,1,float16,float16,0,0.015072000523408255
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,24,4,128,1,float16,float16,0,0.017045332739750545
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,24,4,128,1,float16,fp8,0,0.01971199984351794
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,24,8,128,1,float16,float16,0,0.01666133354107539
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,24,8,128,1,float16,fp8,0,0.019813333948453266
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,24,24,128,1,float16,float16,0,0.01706133286158244
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,24,24,128,1,float16,fp8,0,0.017802666872739792
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,24,1,128,1,float16,float16,0,0.014490666488806406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,24,1,128,1,float16,fp8,0,0.01746133342385292
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,24,2,128,1,float16,float16,0,0.015306666493415833
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,24,2,128,1,float16,fp8,0,0.017818666994571686
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,24,4,128,1,float16,float16,0,0.015279999623696009
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,24,4,128,1,float16,fp8,0,0.0174346665541331
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,24,8,128,1,float16,float16,0,0.014981333166360855
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,24,8,128,1,float16,fp8,0,0.017759999881188076
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,24,24,128,1,float16,float16,0,0.013408000270525614
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,24,24,128,1,float16,fp8,0,0.014090667168299357
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,24,1,128,1,float16,float16,0,0.01469333345691363
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,24,1,128,1,float16,fp8,0,0.01730666682124138
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,24,2,128,1,float16,float16,0,0.014639999717473984
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,24,2,128,1,float16,fp8,0,0.017279999951521557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,24,4,128,1,float16,float16,0,0.014997333288192749
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,24,4,128,1,float16,fp8,0,0.017242666333913803
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,24,8,128,1,float16,float16,0,0.014869333555301031
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,24,8,128,1,float16,fp8,0,0.0176959993938605
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,24,24,128,1,float16,float16,0,0.013210666676362356
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,24,24,128,1,float16,fp8,0,0.013381333400805792
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,24,1,128,1,float16,float16,0,0.01421333352724711
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,24,1,128,1,float16,fp8,0,0.016058667252461117
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,24,2,128,1,float16,float16,0,0.013994666437307993
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,24,2,128,1,float16,fp8,0,0.01618133361140887
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,24,4,128,1,float16,float16,0,0.014245333770910898
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,24,4,128,1,float16,fp8,0,0.01623999948302905
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,24,8,128,1,float16,float16,0,0.014352000008026758
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,24,8,128,1,float16,fp8,0,0.01703466723362605
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,24,24,128,1,float16,float16,0,0.012762666990359625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,24,24,128,1,float16,fp8,0,0.013189333180586496
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,24,1,128,1,float16,float16,0,0.013839999834696451
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,24,2,128,1,float16,float16,0,0.0136266661187013
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,24,1,128,1,float16,fp8,0,0.01609066625436147
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,24,2,128,1,float16,fp8,0,0.01602666700879733
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,24,4,128,1,float16,float16,0,0.013989333063364029
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,24,4,128,1,float16,fp8,0,0.016063999384641647
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,24,8,128,1,float16,float16,0,0.013514666507641474
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,24,8,128,1,float16,fp8,0,0.015802666544914246
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,24,1,128,1,float16,float16,0,0.05615466833114624
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,24,1,128,1,float16,fp8,0,0.059562668204307556
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,24,2,128,1,float16,fp8,0,0.07961066563924153
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,24,2,128,1,float16,float16,0,0.07347199817498525
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,24,4,128,1,float16,float16,0,0.09005866448084514
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,24,4,128,1,float16,fp8,0,0.10408000151316325
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,24,24,128,1,float16,float16,0,0.11505599816640218
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,24,8,128,1,float16,float16,0,0.10296000043551128
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,24,24,128,1,float16,fp8,0,0.1129973332087199
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,24,8,128,1,float16,fp8,0,0.10528000195821126
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,24,1,128,1,float16,float16,0,0.02842666705449422
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,24,1,128,1,float16,fp8,0,0.03313600023587545
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,24,2,128,1,float16,float16,0,0.0354720006386439
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,24,2,128,1,float16,fp8,0,0.041450666884581246
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,24,4,128,1,float16,float16,0,0.042080000042915344
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,24,4,128,1,float16,fp8,0,0.04824000100294749
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,24,8,128,1,float16,float16,0,0.053898667295773826
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,24,24,128,1,float16,float16,0,0.05875200033187866
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,24,8,128,1,float16,fp8,0,0.0497920016447703
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,24,24,128,1,float16,fp8,0,0.05529599885145823
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,24,1,128,1,float16,float16,0,0.020938667158285778
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,24,1,128,1,float16,fp8,0,0.02430933217207591
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,24,2,128,1,float16,float16,0,0.022266666094462078
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,24,2,128,1,float16,fp8,0,0.02573866645495097
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,24,4,128,1,float16,float16,0,0.02441066751877467
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,24,24,128,1,float16,fp8,0,0.032672000428040825
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,24,4,128,1,float16,fp8,0,0.029120000700155895
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,24,8,128,1,float16,float16,0,0.028005334238211315
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,24,8,128,1,float16,fp8,0,0.029472000896930695
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,24,24,128,1,float16,float16,0,0.03201599915822347
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,24,1,128,1,float16,float16,0,0.01523200049996376
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,24,1,128,1,float16,fp8,0,0.017845333864291508
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,24,2,128,1,float16,float16,0,0.015450666348139444
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,24,2,128,1,float16,fp8,0,0.018181333939234417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,24,4,128,1,float16,float16,0,0.016554666062196095
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,24,4,128,1,float16,fp8,0,0.01960533360640208
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,24,8,128,1,float16,float16,0,0.01865600049495697
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,24,8,128,1,float16,fp8,0,0.01916266605257988
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,24,2,128,1,float16,float16,0,0.014869333555301031
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,24,24,128,1,float16,float16,0,0.020917333662509918
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,24,24,128,1,float16,fp8,0,0.02124800036350886
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,24,1,128,1,float16,float16,0,0.014965333044528961
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,24,1,128,1,float16,fp8,0,0.01783466711640358
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,24,2,128,1,float16,fp8,0,0.01730666682124138
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,24,4,128,1,float16,float16,0,0.015135999768972397
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,24,4,128,1,float16,fp8,0,0.017557332913080852
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,24,8,128,1,float16,float16,0,0.013471999516089758
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,24,8,128,1,float16,fp8,0,0.014127999544143677
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,24,24,128,1,float16,float16,0,0.016437333077192307
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,24,2,128,1,float16,fp8,0,0.017184000462293625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,24,24,128,1,float16,fp8,0,0.017136000096797943
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,24,1,128,1,float16,float16,0,0.01463466634353002
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,24,1,128,1,float16,fp8,0,0.01748266691962878
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,24,2,128,1,float16,float16,0,0.014666666587193808
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,24,4,128,1,float16,float16,0,0.014453332871198654
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,24,4,128,1,float16,fp8,0,0.017050666113694508
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,24,8,128,1,float16,float16,0,0.012624000509579977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,24,8,128,1,float16,fp8,0,0.013440000514189402
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,24,24,128,1,float16,float16,0,0.012698666503032049
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,24,24,128,1,float16,fp8,0,0.013738666971524557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,24,1,128,1,float16,float16,0,0.013749333719412485
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,24,1,128,1,float16,fp8,0,0.016629333297411602
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,24,2,128,1,float16,float16,0,0.013989333063364029
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,24,2,128,1,float16,fp8,0,0.015941333025693893
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,24,4,128,1,float16,float16,0,0.013631999492645264
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,24,4,128,1,float16,fp8,0,0.01607999950647354
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,24,8,128,1,float16,float16,0,0.012815999488035837
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,24,8,128,1,float16,fp8,0,0.013354666531085968
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,24,24,128,1,float16,float16,0,0.012821332861979803
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,24,24,128,1,float16,fp8,0,0.013914667069911957
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,24,1,128,1,float16,float16,0,0.013642666240533194
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,24,1,128,1,float16,fp8,0,0.015813333292802174
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,24,8,128,1,float16,float16,0,0.012047999848922094
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,24,2,128,1,float16,float16,0,0.013877333452304205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,24,2,128,1,float16,fp8,0,0.01588800052801768
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,24,4,128,1,float16,float16,0,0.013477332890033722
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,24,4,128,1,float16,fp8,0,0.015722667177518208
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,24,8,128,1,float16,fp8,0,0.012549333274364471
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,24,24,128,1,float16,float16,0,0.01246400053302447
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,24,24,128,1,float16,fp8,0,0.013141332815090815
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,24,1,128,1,float16,float16,0,0.01351999988158544
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,24,1,128,1,float16,fp8,0,0.01589866727590561
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,24,2,128,1,float16,float16,0,0.01379199946920077
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,24,2,128,1,float16,fp8,0,0.015749332805474598
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,24,4,128,1,float16,float16,0,0.013141332815090815
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,24,4,128,1,float16,fp8,0,0.015439999600251516
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,24,8,128,1,float16,float16,0,0.011727999895811081
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,24,8,128,1,float16,fp8,0,0.012458667159080505
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,16,1,128,1,float16,fp8,0,13.939771016438803
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,16,1,128,1,float16,float16,0,15.826085408528646
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,16,2,128,1,float16,fp8,0,14.145818074544271
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,16,2,128,1,float16,float16,0,15.726783752441406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,16,4,128,1,float16,fp8,0,14.812911987304688
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,16,4,128,1,float16,float16,0,16.561461130777996
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,16,8,128,1,float16,float16,0,16.63598378499349
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,16,8,128,1,float16,fp8,0,14.840836842854818
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,16,16,128,1,float16,float16,0,8.337402979532877
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,16,1,128,1,float16,float16,0,7.496085484822591
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,16,1,128,1,float16,fp8,0,7.235322952270508
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,16,16,128,1,float16,fp8,0,7.958943684895833
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,16,2,128,1,float16,float16,0,7.580965042114258
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,16,2,128,1,float16,fp8,0,7.263359705607097
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,16,4,128,1,float16,float16,0,8.177248001098633
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,16,16,128,1,float16,float16,0,3.903200149536133
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,16,16,128,1,float16,fp8,0,3.8960746129353843
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,16,1,128,1,float16,float16,0,3.798341433207194
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,16,4,128,1,float16,fp8,0,7.45193608601888
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,16,1,128,1,float16,fp8,0,3.6034825642903647
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,16,8,128,1,float16,float16,0,8.16862932840983
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,16,2,128,1,float16,float16,0,3.7600533167521157
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,16,8,128,1,float16,fp8,0,7.7447357177734375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,16,2,128,1,float16,fp8,0,3.802469253540039
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,16,4,128,1,float16,float16,0,3.8320960998535156
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,16,16,128,1,float16,float16,0,2.035194714864095
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,16,4,128,1,float16,fp8,0,4.388517379760742
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,16,1,128,1,float16,float16,0,1.8569547335306804
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,16,8,128,1,float16,float16,0,3.818357467651367
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,16,16,128,1,float16,fp8,0,2.1988320350646973
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,16,8,128,1,float16,fp8,0,3.8492374420166016
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,16,1,128,1,float16,fp8,0,1.8709440231323242
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,16,2,128,1,float16,float16,0,1.845461368560791
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,16,2,128,1,float16,fp8,0,1.998960018157959
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,16,4,128,1,float16,float16,0,1.9468000729878743
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,16,4,128,1,float16,fp8,0,2.024160067240397
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,16,8,128,1,float16,float16,0,1.949562708536784
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,16,8,128,1,float16,fp8,0,2.012330691019694
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,16,1,128,1,float16,fp8,0,8.094522476196289
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,16,1,128,1,float16,float16,0,9.06172243754069
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,16,2,128,1,float16,float16,0,8.681680043538412
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,16,2,128,1,float16,fp8,0,8.23914655049642
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,16,4,128,1,float16,float16,0,9.355648040771484
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,16,4,128,1,float16,fp8,0,8.725685119628906
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,16,8,128,1,float16,fp8,0,8.723941167195639
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,16,8,128,1,float16,float16,0,9.498746871948242
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,16,16,128,1,float16,float16,0,4.591408093770345
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,16,1,128,1,float16,float16,0,4.232042630513509
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,16,16,128,1,float16,fp8,0,4.651274681091309
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,16,1,128,1,float16,fp8,0,4.100063959757487
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,16,2,128,1,float16,float16,0,4.333893458048503
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,16,2,128,1,float16,fp8,0,4.17192014058431
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,16,4,128,1,float16,float16,0,4.553754806518555
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,16,4,128,1,float16,fp8,0,4.481738726298015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,16,16,128,1,float16,float16,0,2.304837385813395
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,16,16,128,1,float16,fp8,0,2.33842134475708
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,16,1,128,1,float16,float16,0,2.149376074473063
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,16,1,128,1,float16,fp8,0,2.1002346674601235
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,16,8,128,1,float16,float16,0,4.641039848327637
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,16,2,128,1,float16,float16,0,2.084890683492025
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,16,8,128,1,float16,fp8,0,4.577797253926595
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,16,2,128,1,float16,fp8,0,2.1382880210876465
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,16,4,128,1,float16,float16,0,2.2798666954040527
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,16,4,128,1,float16,fp8,0,2.2828853925069175
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,16,16,128,1,float16,float16,0,1.2624586423238118
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,16,8,128,1,float16,float16,0,2.2652586301167807
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,16,8,128,1,float16,fp8,0,2.284880002339681
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,16,16,128,1,float16,fp8,0,1.2397493521372478
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,16,1,128,1,float16,float16,0,1.0713386535644531
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,16,1,128,1,float16,fp8,0,1.1637760003407795
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,16,2,128,1,float16,float16,0,1.051514705022176
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,16,2,128,1,float16,fp8,0,1.1660160223642986
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,16,4,128,1,float16,float16,0,1.1528373559315999
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,16,4,128,1,float16,fp8,0,1.1902080376942952
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,16,8,128,1,float16,float16,0,1.1656266848246257
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,16,8,128,1,float16,fp8,0,1.2047839959462483
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,16,1,128,1,float16,float16,0,6.217690785725911
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,16,2,128,1,float16,float16,0,5.708970387776692
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,16,1,128,1,float16,fp8,0,5.682853062947591
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,16,2,128,1,float16,fp8,0,5.812154769897461
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,16,4,128,1,float16,float16,0,6.534218470255534
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,16,4,128,1,float16,fp8,0,6.290346781412761
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,16,8,128,1,float16,float16,0,6.59495480855306
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,16,16,128,1,float16,float16,0,3.3045972188313804
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,16,1,128,1,float16,float16,0,2.8830668131510415
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,16,16,128,1,float16,fp8,0,3.232757250467936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,16,2,128,1,float16,float16,0,2.8540000915527344
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,16,1,128,1,float16,fp8,0,3.0025227864583335
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,16,8,128,1,float16,fp8,0,6.327242533365886
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,16,4,128,1,float16,float16,0,3.2085653940836587
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,16,2,128,1,float16,fp8,0,2.962106704711914
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,16,16,128,1,float16,float16,0,1.683616002400716
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,16,4,128,1,float16,fp8,0,3.3262507120768228
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,16,8,128,1,float16,float16,0,3.1901706059773765
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,16,16,128,1,float16,fp8,0,1.696714719136556
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,16,8,128,1,float16,fp8,0,3.1648054122924805
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,16,1,128,1,float16,float16,0,1.4705546696980794
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,16,1,128,1,float16,fp8,0,1.5766560236612956
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,16,2,128,1,float16,float16,0,1.4810560544331868
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,16,2,128,1,float16,fp8,0,1.5739092826843262
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,16,4,128,1,float16,float16,0,1.601029396057129
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,16,4,128,1,float16,fp8,0,1.8500852584838867
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,16,8,128,1,float16,float16,0,1.6110399564107258
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,16,16,128,1,float16,float16,0,0.8695253531138102
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,16,8,128,1,float16,fp8,0,1.6407626469930012
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,16,16,128,1,float16,fp8,0,0.9069759845733643
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,16,1,128,1,float16,float16,0,0.7742506663004557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,16,1,128,1,float16,fp8,0,0.8081386884053549
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,16,2,128,1,float16,float16,0,0.8061973253885905
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,16,2,128,1,float16,fp8,0,0.8178133169809977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,16,4,128,1,float16,float16,0,0.8359200159708658
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,16,4,128,1,float16,fp8,0,0.8674933115641276
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,16,8,128,1,float16,float16,0,0.8356320063273112
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,16,8,128,1,float16,fp8,0,0.8809280395507812
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,16,1,128,1,float16,float16,0,8.094224294026693
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,16,1,128,1,float16,fp8,0,7.4559783935546875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,16,2,128,1,float16,float16,0,7.782074610392253
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,16,2,128,1,float16,fp8,0,7.5908158620198565
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,16,4,128,1,float16,float16,0,8.614543914794922
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,16,4,128,1,float16,fp8,0,8.284629185994467
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,16,8,128,1,float16,float16,0,8.718965530395508
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,16,8,128,1,float16,fp8,0,8.257706960042318
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,16,1,128,1,float16,float16,0,3.8847198486328125
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,16,16,128,1,float16,float16,0,4.537413279215495
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,16,16,128,1,float16,fp8,0,4.349024136861165
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,16,1,128,1,float16,fp8,0,3.758357365926107
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,16,2,128,1,float16,float16,0,3.857680002848307
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,16,2,128,1,float16,fp8,0,3.835594813028971
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,16,4,128,1,float16,float16,0,4.138304074605306
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,16,4,128,1,float16,fp8,0,4.195333480834961
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,16,16,128,1,float16,float16,0,2.166874726613363
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,16,1,128,1,float16,float16,0,2.0137599309285483
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,16,1,128,1,float16,fp8,0,1.9064639409383137
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,16,16,128,1,float16,fp8,0,2.227973302205404
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,16,8,128,1,float16,float16,0,4.28334395090739
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,16,8,128,1,float16,fp8,0,4.300112088521321
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,16,2,128,1,float16,float16,0,1.8851733207702637
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,16,2,128,1,float16,fp8,0,1.9596586227416992
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,16,4,128,1,float16,float16,0,2.083573341369629
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,16,4,128,1,float16,fp8,0,2.2783360481262207
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,16,16,128,1,float16,float16,0,1.2119893232981365
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,16,8,128,1,float16,float16,0,2.114858627319336
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,16,16,128,1,float16,fp8,0,1.155509312947591
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,16,1,128,1,float16,float16,0,0.9621067047119141
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,16,8,128,1,float16,fp8,0,2.1815412839253745
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,16,1,128,1,float16,fp8,0,0.9955093065897623
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,16,2,128,1,float16,float16,0,0.9826986789703369
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,16,2,128,1,float16,fp8,0,1.0442240238189697
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,16,4,128,1,float16,float16,0,1.0688532988230388
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,16,4,128,1,float16,fp8,0,1.1547146638234456
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,16,8,128,1,float16,float16,0,1.0905866622924805
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,16,16,128,1,float16,float16,0,0.5897973378499349
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,16,1,128,1,float16,fp8,0,0.5365226666132609
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,16,8,128,1,float16,fp8,0,1.1340746879577637
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,16,16,128,1,float16,fp8,0,0.6214239994684855
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,16,1,128,1,float16,float16,0,0.5086933374404907
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,16,2,128,1,float16,float16,0,0.5259199937184652
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,16,2,128,1,float16,fp8,0,0.5542240142822266
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,16,4,128,1,float16,float16,0,0.5636746486028036
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,16,4,128,1,float16,fp8,0,0.5869919856389364
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,16,8,128,1,float16,float16,0,0.5792693297068278
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,16,8,128,1,float16,fp8,0,0.6113653182983398
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,16,1,128,1,float16,float16,0,4.45194149017334
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,16,1,128,1,float16,fp8,0,4.398847897847493
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,16,2,128,1,float16,float16,0,4.468117396036784
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,16,2,128,1,float16,fp8,0,4.460106531778972
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,16,4,128,1,float16,float16,0,5.016357421875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,16,4,128,1,float16,fp8,0,5.001071929931641
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,16,8,128,1,float16,float16,0,4.978613217671712
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,16,16,128,1,float16,float16,0,2.5798560778299966
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,16,1,128,1,float16,float16,0,2.167407989501953
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,16,16,128,1,float16,fp8,0,2.8480428059895835
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,16,1,128,1,float16,fp8,0,2.3092427253723145
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,16,8,128,1,float16,fp8,0,5.088543891906738
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,16,2,128,1,float16,float16,0,2.188927968343099
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,16,2,128,1,float16,fp8,0,2.39030392964681
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,16,4,128,1,float16,float16,0,2.4616853396097818
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,16,4,128,1,float16,fp8,0,2.597482681274414
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,16,8,128,1,float16,float16,0,2.490837256113688
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,16,16,128,1,float16,float16,0,1.3261280059814453
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,16,8,128,1,float16,fp8,0,2.57586669921875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,16,16,128,1,float16,fp8,0,1.4225813547770183
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,16,1,128,1,float16,float16,0,1.0886826515197754
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,16,1,128,1,float16,fp8,0,1.202741305033366
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,16,2,128,1,float16,float16,0,1.1203093528747559
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,16,2,128,1,float16,fp8,0,1.1943413416544597
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,16,4,128,1,float16,fp8,0,1.3152693112691243
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,16,4,128,1,float16,float16,0,1.264789342880249
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,16,8,128,1,float16,float16,0,1.2655786673227947
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,16,16,128,1,float16,float16,0,0.6981813112894694
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,16,8,128,1,float16,fp8,0,1.3241013685862224
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,16,16,128,1,float16,fp8,0,0.7231199741363525
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,16,2,128,1,float16,float16,0,0.5866453250249227
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,16,1,128,1,float16,float16,0,0.5813226699829102
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,16,1,128,1,float16,fp8,0,0.6024320125579834
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,16,2,128,1,float16,fp8,0,0.612394650777181
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,16,4,128,1,float16,float16,0,0.6515040000279745
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,16,8,128,1,float16,float16,0,0.6574826637903849
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,16,8,128,1,float16,fp8,0,0.69923202196757
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,16,16,128,1,float16,float16,0,0.3721333344777425
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,16,16,128,1,float16,fp8,0,0.39875733852386475
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,16,4,128,1,float16,fp8,0,0.6857600212097168
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,16,1,128,1,float16,float16,0,0.3129173318545024
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,16,1,128,1,float16,fp8,0,0.3253600001335144
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,16,2,128,1,float16,float16,0,0.3102560043334961
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,16,2,128,1,float16,fp8,0,0.3376106818517049
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,16,4,128,1,float16,float16,0,0.35364798704783124
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,16,4,128,1,float16,fp8,0,0.3776959975560506
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,16,8,128,1,float16,float16,0,0.3609813451766968
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,16,8,128,1,float16,fp8,0,0.38442134857177734
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,16,1,128,1,float16,float16,0,4.0529225667317705
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,16,1,128,1,float16,fp8,0,4.169072151184082
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,16,2,128,1,float16,float16,0,4.196042696634929
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,16,2,128,1,float16,fp8,0,4.261242548624675
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,16,4,128,1,float16,float16,0,4.960106531778972
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,16,4,128,1,float16,fp8,0,4.9823306401570635
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,16,8,128,1,float16,float16,0,5.001248041788737
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,16,8,128,1,float16,fp8,0,4.967930793762207
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,16,1,128,1,float16,float16,0,2.0466079711914062
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,16,2,128,1,float16,float16,0,2.1107892990112305
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,16,16,128,1,float16,float16,0,2.6099467277526855
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,16,16,128,1,float16,fp8,0,2.6980053583780923
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,16,1,128,1,float16,fp8,0,2.108703931172689
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,16,2,128,1,float16,fp8,0,2.1410826047261557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,16,4,128,1,float16,float16,0,2.506826718648275
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,16,4,128,1,float16,fp8,0,2.5127414067586265
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,16,16,128,1,float16,float16,0,1.319258689880371
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,16,8,128,1,float16,float16,0,2.4439519246419272
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,16,16,128,1,float16,fp8,0,1.3884000778198242
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,16,1,128,1,float16,float16,0,1.018661340077718
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,16,8,128,1,float16,fp8,0,2.5868800481160483
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,16,1,128,1,float16,fp8,0,1.0692853132883708
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,16,2,128,1,float16,float16,0,1.0548266569773357
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,16,2,128,1,float16,fp8,0,1.0957012971242268
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,16,4,128,1,float16,float16,0,1.2436906496683757
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,16,4,128,1,float16,fp8,0,1.3147573471069336
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,16,8,128,1,float16,float16,0,1.239024003346761
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,16,2,128,1,float16,float16,0,0.5536213318506876
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,16,16,128,1,float16,float16,0,0.6762293179829916
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,16,1,128,1,float16,float16,0,0.5347679853439331
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,16,1,128,1,float16,fp8,0,0.5596959988276163
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,16,8,128,1,float16,fp8,0,1.2969760100046794
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,16,4,128,1,float16,fp8,0,0.6844000021616617
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,16,16,128,1,float16,fp8,0,0.7082933584849039
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,16,2,128,1,float16,fp8,0,0.5839413404464722
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,16,4,128,1,float16,float16,0,0.6306453148523966
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,16,8,128,1,float16,float16,0,0.6441760063171387
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,16,8,128,1,float16,fp8,0,0.6932853062947592
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,16,16,128,1,float16,float16,0,0.35576534271240234
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,16,16,128,1,float16,fp8,0,0.3839999834696452
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,16,1,128,1,float16,float16,0,0.287989338239034
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,16,1,128,1,float16,fp8,0,0.3052106698354085
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,16,2,128,1,float16,float16,0,0.2939573327700297
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,16,2,128,1,float16,fp8,0,0.309717337290446
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,16,4,128,1,float16,float16,0,0.3402773141860962
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,16,4,128,1,float16,fp8,0,0.3659040133158366
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,16,8,128,1,float16,float16,0,0.34329601128896076
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,16,8,128,1,float16,fp8,0,0.3747626543045044
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,16,16,128,1,float16,float16,0,0.20472532510757446
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,16,16,128,1,float16,fp8,0,0.22125866015752158
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,16,1,128,1,float16,float16,0,0.1580959955851237
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,16,1,128,1,float16,fp8,0,0.16425066192944845
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,16,2,128,1,float16,float16,0,0.1613920032978058
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,16,2,128,1,float16,fp8,0,0.1747573415438334
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,16,4,128,1,float16,float16,0,0.18967467546463013
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,16,8,128,1,float16,fp8,0,0.21348265806833902
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,16,4,128,1,float16,fp8,0,0.2025173306465149
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,16,8,128,1,float16,float16,0,0.19662932554880777
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,16,1,128,1,float16,float16,0,2.3528107007344565
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,16,2,128,1,float16,float16,0,2.474991957346598
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,16,1,128,1,float16,fp8,0,2.5118080774943032
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,16,2,128,1,float16,fp8,0,2.5624693234761557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,16,4,128,1,float16,float16,0,2.987173398335775
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,16,4,128,1,float16,fp8,0,3.1442880630493164
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,16,8,128,1,float16,float16,0,3.058042526245117
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,16,16,128,1,float16,float16,0,1.6172213554382324
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,16,1,128,1,float16,float16,0,1.2031786441802979
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,16,16,128,1,float16,fp8,0,1.6964853604634602
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,16,2,128,1,float16,float16,0,1.243616024653117
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,16,1,128,1,float16,fp8,0,1.28766934076945
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,16,8,128,1,float16,fp8,0,3.1516586939493814
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,16,2,128,1,float16,fp8,0,1.3125706513722737
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,16,4,128,1,float16,float16,0,1.510207970937093
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,16,4,128,1,float16,fp8,0,1.5887680053710938
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,16,8,128,1,float16,float16,0,1.5332852999369304
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,16,16,128,1,float16,float16,0,0.8259147008260092
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,16,16,128,1,float16,fp8,0,0.8883626461029053
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,16,8,128,1,float16,fp8,0,1.6138134002685547
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,16,1,128,1,float16,float16,0,0.6268426577250162
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,16,1,128,1,float16,fp8,0,0.6584213177363077
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,16,2,128,1,float16,float16,0,0.6417813301086426
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,16,4,128,1,float16,float16,0,0.7723093032836914
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,16,2,128,1,float16,fp8,0,0.6764960289001465
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,16,4,128,1,float16,fp8,0,0.8320000171661377
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,16,8,128,1,float16,float16,0,0.7767999966939291
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,16,8,128,1,float16,fp8,0,0.8385706742604574
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,16,16,128,1,float16,float16,0,0.43321601549784344
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,16,16,128,1,float16,fp8,0,0.4655359983444214
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,16,1,128,1,float16,float16,0,0.3251733382542928
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,16,1,128,1,float16,fp8,0,0.34938132762908936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,16,2,128,1,float16,float16,0,0.34140264987945557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,16,2,128,1,float16,fp8,0,0.3640799919764201
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,16,4,128,1,float16,float16,0,0.40250666936238605
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,16,4,128,1,float16,fp8,0,0.4401013453801473
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,16,8,128,1,float16,float16,0,0.41520531972249347
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,16,8,128,1,float16,fp8,0,0.45367467403411865
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,16,16,128,1,float16,float16,0,0.23401067654291788
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,16,16,128,1,float16,fp8,0,0.2576319972674052
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,16,1,128,1,float16,float16,0,0.18041600783665976
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,16,1,128,1,float16,fp8,0,0.1937546730041504
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,16,2,128,1,float16,float16,0,0.1851093371709188
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,16,2,128,1,float16,fp8,0,0.19871467351913452
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,16,4,128,1,float16,float16,0,0.2227893273035685
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,16,4,128,1,float16,fp8,0,0.2429866592089335
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,16,8,128,1,float16,float16,0,0.22569066286087036
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,16,8,128,1,float16,fp8,0,0.24765866994857788
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,16,16,128,1,float16,float16,0,0.1365066667397817
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,16,16,128,1,float16,fp8,0,0.1520853340625763
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,16,1,128,1,float16,float16,0,0.09985599915186565
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,16,1,128,1,float16,fp8,0,0.1083679993947347
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,16,2,128,1,float16,float16,0,0.10607999563217163
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,16,2,128,1,float16,fp8,0,0.11735467116038005
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,16,4,128,1,float16,float16,0,0.1199733316898346
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,16,4,128,1,float16,fp8,0,0.13078932960828146
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,16,8,128,1,float16,float16,0,0.13103999694188437
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,16,8,128,1,float16,fp8,0,0.14050666491190592
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,16,1,128,1,float16,float16,0,2.316378593444824
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,16,1,128,1,float16,fp8,0,2.503941377003988
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,16,2,128,1,float16,float16,0,2.5015786488850913
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,16,2,128,1,float16,fp8,0,2.587274710337321
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,16,4,128,1,float16,float16,0,3.125962575276693
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,16,4,128,1,float16,fp8,0,3.320277214050293
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,16,8,128,1,float16,float16,0,3.1989920934041343
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,16,16,128,1,float16,float16,0,1.7201333045959473
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,16,8,128,1,float16,fp8,0,3.3646507263183594
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,16,1,128,1,float16,float16,0,1.1890880266825359
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,16,16,128,1,float16,fp8,0,1.8125227292378743
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,16,1,128,1,float16,fp8,0,1.2696586449940999
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,16,2,128,1,float16,float16,0,1.2534026304880779
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,16,2,128,1,float16,fp8,0,1.3074986934661865
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,16,4,128,1,float16,float16,0,1.5849332809448242
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,16,4,128,1,float16,fp8,0,1.6986932754516602
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,16,8,128,1,float16,float16,0,1.5919626553853352
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,16,16,128,1,float16,float16,0,0.8664693037668864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,16,1,128,1,float16,float16,0,0.6033653418223063
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,16,8,128,1,float16,fp8,0,1.701514720916748
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,16,1,128,1,float16,fp8,0,0.6507146755854288
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,16,16,128,1,float16,fp8,0,0.9506933689117432
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,16,2,128,1,float16,float16,0,0.6516319910685221
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,16,2,128,1,float16,fp8,0,0.6848693688710531
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,16,4,128,1,float16,float16,0,0.803551991780599
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,16,4,128,1,float16,fp8,0,0.8700640201568604
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,16,8,128,1,float16,float16,0,0.8125173250834147
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,16,16,128,1,float16,float16,0,0.4530346790949504
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,16,8,128,1,float16,fp8,0,0.8773600260416666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,16,16,128,1,float16,fp8,0,0.4975359837214152
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,16,1,128,1,float16,float16,0,0.3167146643002828
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,16,1,128,1,float16,fp8,0,0.34799468517303467
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,16,8,128,1,float16,float16,0,0.42934401830037433
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,16,2,128,1,float16,float16,0,0.3422880172729492
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,16,2,128,1,float16,fp8,0,0.35790932178497314
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,16,4,128,1,float16,float16,0,0.42102400461832684
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,16,16,128,1,float16,fp8,0,0.26525866985321045
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,16,4,128,1,float16,fp8,0,0.4582879940668742
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,16,8,128,1,float16,fp8,0,0.4655199845631917
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,16,16,128,1,float16,float16,0,0.2428213357925415
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,16,1,128,1,float16,float16,0,0.174127995967865
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,16,1,128,1,float16,fp8,0,0.1901866594950358
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,16,2,128,1,float16,float16,0,0.18665067354838052
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,16,2,128,1,float16,fp8,0,0.19851734240849814
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,16,4,128,1,float16,float16,0,0.22691200176874796
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,16,4,128,1,float16,fp8,0,0.24834134181340536
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,16,8,128,1,float16,float16,0,0.23210134108861288
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,16,8,128,1,float16,fp8,0,0.2550293405850728
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,16,16,128,1,float16,float16,0,0.1355946660041809
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,16,16,128,1,float16,fp8,0,0.15203733245531717
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,16,1,128,1,float16,float16,0,0.09649067123730977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,16,1,128,1,float16,fp8,0,0.10499733686447144
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,16,2,128,1,float16,float16,0,0.10010133186976115
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,16,2,128,1,float16,fp8,0,0.11050666371981303
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,16,4,128,1,float16,float16,0,0.12659200032552084
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,16,8,128,1,float16,float16,0,0.130021333694458
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,16,4,128,1,float16,fp8,0,0.1359946628411611
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,16,8,128,1,float16,fp8,0,0.14402133226394653
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,16,16,128,1,float16,float16,0,0.08105599880218506
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,16,16,128,1,float16,fp8,0,0.08842133482297261
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,16,1,128,1,float16,float16,0,0.05643733342488607
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,16,1,128,1,float16,fp8,0,0.06275199850400288
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,16,4,128,1,float16,fp8,0,0.07858666777610779
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,16,2,128,1,float16,float16,0,0.06198399762312571
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,16,2,128,1,float16,fp8,0,0.07003733515739441
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,16,4,128,1,float16,float16,0,0.07034666836261749
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,16,8,128,1,float16,float16,0,0.07148799796899159
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,16,8,128,1,float16,fp8,0,0.0800799975792567
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,16,1,128,1,float16,float16,0,1.4603360493977864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,16,1,128,1,float16,fp8,0,1.5755786895751953
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,16,2,128,1,float16,float16,0,1.5618400573730469
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,16,4,128,1,float16,float16,0,2.033514658610026
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,16,2,128,1,float16,fp8,0,1.6427626609802246
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,16,4,128,1,float16,fp8,0,2.2080586751302085
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,16,8,128,1,float16,float16,0,2.0819093386332193
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,16,16,128,1,float16,float16,0,1.1399412949879963
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,16,8,128,1,float16,fp8,0,2.1969067255655923
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,16,1,128,1,float16,fp8,0,0.8014026482899984
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,16,2,128,1,float16,float16,0,0.7870240211486816
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,16,4,128,1,float16,fp8,0,1.1308533350626628
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,16,1,128,1,float16,float16,0,0.7358880043029785
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,16,4,128,1,float16,float16,0,1.0295999844868977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,16,2,128,1,float16,fp8,0,0.8363839785257975
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,16,8,128,1,float16,float16,0,1.0606826941172283
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,16,8,128,1,float16,fp8,0,1.1559253533681233
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,16,16,128,1,float16,float16,0,0.5784106651941935
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,16,16,128,1,float16,fp8,0,0.6420746644337972
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,16,1,128,1,float16,float16,0,0.3800426721572876
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,16,16,128,1,float16,fp8,0,1.2250080108642578
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,16,1,128,1,float16,fp8,0,0.4175413449605306
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,16,2,128,1,float16,float16,0,0.41541866461435956
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,16,2,128,1,float16,fp8,0,0.43592532475789386
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,16,4,128,1,float16,float16,0,0.5323413213094076
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,16,4,128,1,float16,fp8,0,0.5886346499125162
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,16,8,128,1,float16,float16,0,0.5475360155105591
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,16,8,128,1,float16,fp8,0,0.59661865234375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,16,16,128,1,float16,fp8,0,0.3344959815343221
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,16,16,128,1,float16,float16,0,0.31387199958165485
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,16,1,128,1,float16,float16,0,0.20710400740305582
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,16,1,128,1,float16,fp8,0,0.22427199284235635
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,16,2,128,1,float16,float16,0,0.223471999168396
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,16,2,128,1,float16,fp8,0,0.23590399821599325
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,16,4,128,1,float16,float16,0,0.28118399779001874
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,16,4,128,1,float16,fp8,0,0.31062400341033936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,16,8,128,1,float16,float16,0,0.2890133261680603
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,16,8,128,1,float16,fp8,0,0.32516799370447796
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,16,16,128,1,float16,float16,0,0.16672533750534058
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,16,16,128,1,float16,fp8,0,0.18702399730682373
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,16,1,128,1,float16,float16,0,0.11582932869593303
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,16,1,128,1,float16,fp8,0,0.12597333391507468
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,16,2,128,1,float16,float16,0,0.11786666512489319
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,16,2,128,1,float16,fp8,0,0.1279253363609314
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,16,4,128,1,float16,float16,0,0.1544373333454132
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,16,4,128,1,float16,fp8,0,0.17186667521794638
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,16,8,128,1,float16,float16,0,0.15822933117548624
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,16,8,128,1,float16,fp8,0,0.17890133460362753
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,16,16,128,1,float16,float16,0,0.09481066465377808
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,16,16,128,1,float16,fp8,0,0.10909866293271382
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,16,1,128,1,float16,float16,0,0.06391466657320659
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,16,1,128,1,float16,fp8,0,0.07167999943097432
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,16,2,128,1,float16,float16,0,0.06865066786607106
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,16,2,128,1,float16,fp8,0,0.07800533374150594
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,16,4,128,1,float16,float16,0,0.08056533336639404
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,16,4,128,1,float16,fp8,0,0.089519997437795
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,16,8,128,1,float16,float16,0,0.08971200386683147
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,16,8,128,1,float16,fp8,0,0.09769066174825032
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,16,16,128,1,float16,float16,0,0.05584000051021576
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,16,16,128,1,float16,fp8,0,0.06087466577688853
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,16,1,128,1,float16,float16,0,0.043338666359583534
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,16,1,128,1,float16,fp8,0,0.04863999783992767
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,16,2,128,1,float16,float16,0,0.043696001172065735
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,16,2,128,1,float16,fp8,0,0.04993600149949392
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,16,4,128,1,float16,float16,0,0.050111999114354454
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,16,4,128,1,float16,fp8,0,0.057189335425694786
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,16,8,128,1,float16,float16,0,0.05037866532802582
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,16,8,128,1,float16,fp8,0,0.05900266766548157
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,16,1,128,1,float16,float16,0,1.523189385732015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,16,1,128,1,float16,fp8,0,1.6999786694844563
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,16,2,128,1,float16,float16,0,1.6533066431681316
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,16,2,128,1,float16,fp8,0,1.7697547276814778
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,16,4,128,1,float16,float16,0,2.3061973253885903
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,16,8,128,1,float16,float16,0,2.319039980570475
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,16,16,128,1,float16,float16,0,1.2994773387908936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,16,4,128,1,float16,fp8,0,2.521397272745768
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,16,1,128,1,float16,float16,0,0.7803786595662435
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,16,16,128,1,float16,fp8,0,1.4800960222880046
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,16,8,128,1,float16,fp8,0,2.4909866650899253
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,16,2,128,1,float16,float16,0,0.8304159641265869
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,16,1,128,1,float16,fp8,0,0.8583947022755941
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,16,2,128,1,float16,fp8,0,0.9061813354492188
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,16,4,128,1,float16,float16,0,1.179909308751424
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,16,8,128,1,float16,float16,0,1.2065760294596355
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,16,4,128,1,float16,fp8,0,1.2976853052775066
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,16,8,128,1,float16,fp8,0,1.3376533190409343
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,16,16,128,1,float16,float16,0,0.6600213448206583
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,16,16,128,1,float16,fp8,0,0.7561439673105875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,16,1,128,1,float16,float16,0,0.4043999910354614
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,16,2,128,1,float16,float16,0,0.4530080159505208
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,16,1,128,1,float16,fp8,0,0.44725334644317627
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,16,2,128,1,float16,fp8,0,0.46724800268809
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,16,4,128,1,float16,float16,0,0.5978879928588867
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,16,4,128,1,float16,fp8,0,0.6667626698811849
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,16,8,128,1,float16,float16,0,0.6155626773834229
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,16,8,128,1,float16,fp8,0,0.6899253527323405
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,16,16,128,1,float16,float16,0,0.3440053462982178
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,16,16,128,1,float16,fp8,0,0.39337066809336346
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,16,1,128,1,float16,float16,0,0.21426133314768472
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,16,2,128,1,float16,float16,0,0.2358293334643046
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,16,1,128,1,float16,fp8,0,0.236735999584198
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,16,2,128,1,float16,fp8,0,0.24856533606847128
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,16,4,128,1,float16,float16,0,0.3147786657015483
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,16,16,128,1,float16,fp8,0,0.21039466063181558
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,16,4,128,1,float16,fp8,0,0.35102399190266925
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,16,8,128,1,float16,float16,0,0.318234662214915
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,16,8,128,1,float16,fp8,0,0.3573919932047526
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,16,16,128,1,float16,float16,0,0.18378132581710815
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,16,1,128,1,float16,float16,0,0.11854400237401326
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,16,1,128,1,float16,fp8,0,0.13209600249926248
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,16,8,128,1,float16,float16,0,0.17330666383107504
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,16,2,128,1,float16,float16,0,0.1292586624622345
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,16,2,128,1,float16,fp8,0,0.13873599966367087
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,16,4,128,1,float16,float16,0,0.1689173380533854
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,16,4,128,1,float16,fp8,0,0.19097065925598145
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,16,8,128,1,float16,fp8,0,0.19906665881474814
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,16,16,128,1,float16,float16,0,0.10347732901573181
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,16,1,128,1,float16,float16,0,0.06638399759928386
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,16,16,128,1,float16,fp8,0,0.12113599975903828
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,16,1,128,1,float16,fp8,0,0.07419733206431071
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,16,2,128,1,float16,float16,0,0.06913599868615468
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,16,2,128,1,float16,fp8,0,0.07924800117810567
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,16,4,128,1,float16,float16,0,0.09398399790128072
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,16,4,128,1,float16,fp8,0,0.10028800368309021
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,16,8,128,1,float16,float16,0,0.09566932916641235
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,16,8,128,1,float16,fp8,0,0.10925867160161336
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,16,16,128,1,float16,float16,0,0.061018665631612144
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,16,16,128,1,float16,fp8,0,0.0688213308652242
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,16,1,128,1,float16,float16,0,0.04132800052563349
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,16,1,128,1,float16,fp8,0,0.047279998660087585
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,16,2,128,1,float16,float16,0,0.043354665239652
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,16,2,128,1,float16,fp8,0,0.05129066606362661
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,16,4,128,1,float16,float16,0,0.04956800242265066
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,16,4,128,1,float16,fp8,0,0.058005332946777344
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,16,8,128,1,float16,float16,0,0.050944000482559204
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,16,8,128,1,float16,fp8,0,0.0583840012550354
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,16,16,128,1,float16,float16,0,0.0314026673634847
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,16,16,128,1,float16,fp8,0,0.037087999284267426
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,16,1,128,1,float16,float16,0,0.026341333985328674
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,16,1,128,1,float16,fp8,0,0.029792000850041706
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,16,2,128,1,float16,float16,0,0.027093333502610523
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,16,2,128,1,float16,fp8,0,0.0314026673634847
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,16,4,128,1,float16,float16,0,0.03014400104681651
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,16,4,128,1,float16,fp8,0,0.03493333359559377
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,16,8,128,1,float16,float16,0,0.03027733415365219
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,16,8,128,1,float16,fp8,0,0.035418666899204254
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,16,1,128,1,float16,float16,0,1.1532639662424724
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,16,1,128,1,float16,fp8,0,1.3106239636739094
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,16,2,128,1,float16,float16,0,1.28001070022583
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,16,2,128,1,float16,fp8,0,1.3872532844543457
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,16,4,128,1,float16,float16,0,1.9348479906717937
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,16,16,128,1,float16,float16,0,1.108517328898112
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,16,16,128,1,float16,fp8,0,1.2623093128204346
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,16,4,128,1,float16,fp8,0,2.150874614715576
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,16,1,128,1,float16,float16,0,0.5823573271433512
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,16,1,128,1,float16,fp8,0,0.6726773579915365
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,16,2,128,1,float16,fp8,0,0.7071839968363444
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,16,2,128,1,float16,float16,0,0.6479039986928304
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,16,8,128,1,float16,float16,0,1.9907414118448894
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,16,8,128,1,float16,fp8,0,2.253882726033529
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,16,4,128,1,float16,float16,0,0.9818560282389323
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,16,16,128,1,float16,float16,0,0.5629920164744059
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,16,4,128,1,float16,fp8,0,1.1018133163452148
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,16,16,128,1,float16,fp8,0,0.6843520005544027
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,16,8,128,1,float16,float16,0,1.0082666873931885
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,16,8,128,1,float16,fp8,0,1.1132746537526448
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,16,2,128,1,float16,fp8,0,0.37001601854960126
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,16,2,128,1,float16,float16,0,0.35415999094645184
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,16,4,128,1,float16,float16,0,0.4984106620152791
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,16,4,128,1,float16,fp8,0,0.5669546524683634
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,16,1,128,1,float16,float16,0,0.30426132678985596
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,16,16,128,1,float16,float16,0,0.29233600695927936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,16,8,128,1,float16,float16,0,0.5180960098902384
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,16,16,128,1,float16,fp8,0,0.35547200838724774
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,16,8,128,1,float16,fp8,0,0.6135626633961996
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,16,1,128,1,float16,float16,0,0.16109866897265115
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,16,1,128,1,float16,fp8,0,0.3487413326899211
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,16,1,128,1,float16,fp8,0,0.18747733036677042
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,16,2,128,1,float16,float16,0,0.18588799238204956
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,16,4,128,1,float16,fp8,0,0.30293333530426025
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,16,2,128,1,float16,fp8,0,0.19542400042215982
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,16,4,128,1,float16,float16,0,0.26198933521906537
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,16,8,128,1,float16,float16,0,0.2664639949798584
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,16,8,128,1,float16,fp8,0,0.31575467189153034
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,16,16,128,1,float16,float16,0,0.15442132949829102
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,16,1,128,1,float16,float16,0,0.0920960009098053
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,16,16,128,1,float16,fp8,0,0.18965866168340048
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,16,1,128,1,float16,fp8,0,0.10508267084757487
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,16,2,128,1,float16,float16,0,0.10179733236630757
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,16,4,128,1,float16,fp8,0,0.1628159979979197
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,16,2,128,1,float16,fp8,0,0.11013333002726237
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,16,8,128,1,float16,fp8,0,0.17545066277186075
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,16,16,128,1,float16,float16,0,0.08635200063387553
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,16,16,128,1,float16,fp8,0,0.10855467120806377
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,16,4,128,1,float16,float16,0,0.13966400424639383
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,16,8,128,1,float16,float16,0,0.14493333299954733
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,16,1,128,1,float16,float16,0,0.05189333359400431
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,16,1,128,1,float16,fp8,0,0.05704000095526377
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,16,2,128,1,float16,float16,0,0.05458133419354757
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,16,2,128,1,float16,fp8,0,0.06155733267466227
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,16,4,128,1,float16,float16,0,0.07778666913509369
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,16,4,128,1,float16,fp8,0,0.08469866712888081
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,16,8,128,1,float16,float16,0,0.08113066852092743
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,16,8,128,1,float16,fp8,0,0.09635200103123982
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,16,16,128,1,float16,float16,0,0.0495306650797526
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,16,16,128,1,float16,fp8,0,0.0553706685702006
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,16,2,128,1,float16,fp8,0,0.03905600061019262
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,16,1,128,1,float16,float16,0,0.031301334500312805
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,16,1,128,1,float16,fp8,0,0.03643200049797694
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,16,2,128,1,float16,float16,0,0.033674667278925575
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,16,4,128,1,float16,float16,0,0.03904533386230469
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,16,4,128,1,float16,fp8,0,0.04574933151404063
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,16,8,128,1,float16,fp8,0,0.050527999798456825
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,16,16,128,1,float16,float16,0,0.025061334172884624
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,16,8,128,1,float16,float16,0,0.04071466624736786
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,16,16,128,1,float16,fp8,0,0.03165333221356074
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,16,1,128,1,float16,float16,0,0.01995733380317688
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,16,1,128,1,float16,fp8,0,0.02456533412138621
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,16,2,128,1,float16,float16,0,0.021541332205136616
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,16,2,128,1,float16,fp8,0,0.025397333006064098
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,16,4,128,1,float16,float16,0,0.024325333535671234
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,16,4,128,1,float16,fp8,0,0.02918400118748347
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,16,8,128,1,float16,float16,0,0.02475733309984207
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,16,8,128,1,float16,fp8,0,0.03071466585000356
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,16,16,128,1,float16,float16,0,0.021503999829292297
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,16,16,128,1,float16,fp8,0,0.02743999908367793
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,16,1,128,1,float16,float16,0,0.019178666174411774
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,16,1,128,1,float16,fp8,0,0.02298133323589961
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,16,2,128,1,float16,float16,0,0.01951466624935468
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,16,2,128,1,float16,fp8,0,0.023423999547958374
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,16,4,128,1,float16,float16,0,0.021322667598724365
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,16,4,128,1,float16,fp8,0,0.02536533276240031
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,16,8,128,1,float16,float16,0,0.020928000410397846
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,16,8,128,1,float16,fp8,0,0.026885333160559338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,16,1,128,1,float16,float16,0,0.4897013505299886
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,16,1,128,1,float16,fp8,0,0.5702773332595825
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,16,2,128,1,float16,float16,0,0.5638293425242106
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,16,2,128,1,float16,fp8,0,0.6095733245213827
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,16,16,128,1,float16,float16,0,0.517194668451945
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,16,16,128,1,float16,fp8,0,0.6057013273239136
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,16,4,128,1,float16,float16,0,0.889082670211792
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,16,4,128,1,float16,fp8,0,1.007754643758138
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,16,1,128,1,float16,float16,0,0.2546773354212443
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,16,1,128,1,float16,fp8,0,0.2993546724319458
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,16,8,128,1,float16,fp8,0,1.084181308746338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,16,2,128,1,float16,float16,0,0.3073280056317647
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,16,2,128,1,float16,fp8,0,0.31753067175547284
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,16,8,128,1,float16,float16,0,0.9215199947357178
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,16,4,128,1,float16,float16,0,0.45047998428344727
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,16,16,128,1,float16,fp8,0,0.3091199994087219
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,16,8,128,1,float16,float16,0,0.46082667509714764
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,16,8,128,1,float16,fp8,0,0.5618400176366171
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,16,4,128,1,float16,fp8,0,0.5131200154622396
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,16,1,128,1,float16,float16,0,0.13713600238164267
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,16,2,128,1,float16,float16,0,0.15846932927767435
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,16,2,128,1,float16,fp8,0,0.17161067326863608
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,16,4,128,1,float16,float16,0,0.237445334593455
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,16,4,128,1,float16,fp8,0,0.2773226698239644
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,16,8,128,1,float16,fp8,0,0.2890186707178752
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,16,16,128,1,float16,float16,0,0.14249066511789957
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,16,8,128,1,float16,float16,0,0.23900800943374634
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,16,16,128,1,float16,float16,0,0.2685226599375407
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,16,1,128,1,float16,fp8,0,0.15843733151753744
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,16,16,128,1,float16,fp8,0,0.17221333583196005
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,16,1,128,1,float16,float16,0,0.07865066826343536
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,16,1,128,1,float16,fp8,0,0.09206400314966838
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,16,2,128,1,float16,float16,0,0.08891733487447102
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,16,2,128,1,float16,fp8,0,0.09699199597040813
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,16,4,128,1,float16,float16,0,0.12454400459925334
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,16,8,128,1,float16,float16,0,0.12917332847913107
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,16,4,128,1,float16,fp8,0,0.14797332882881165
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,16,16,128,1,float16,float16,0,0.0792799989382426
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,16,8,128,1,float16,fp8,0,0.15983466307322183
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,16,16,128,1,float16,fp8,0,0.09752532839775085
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,16,1,128,1,float16,float16,0,0.042090664307276406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,16,1,128,1,float16,fp8,0,0.04930666585763296
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,16,2,128,1,float16,float16,0,0.045194665590922035
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,16,4,128,1,float16,fp8,0,0.07794133325417836
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,16,8,128,1,float16,float16,0,0.07145600020885468
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,16,2,128,1,float16,fp8,0,0.05203733344872793
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,16,4,128,1,float16,float16,0,0.0687253326177597
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,16,8,128,1,float16,fp8,0,0.08967467149098714
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,16,16,128,1,float16,float16,0,0.04322666426499685
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,16,16,128,1,float16,fp8,0,0.05126399795214335
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,16,1,128,1,float16,float16,0,0.025653332471847534
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,16,1,128,1,float16,fp8,0,0.030394665896892548
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,16,2,128,1,float16,float16,0,0.027653334041436512
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,16,2,128,1,float16,fp8,0,0.033071999748547874
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,16,4,128,1,float16,float16,0,0.03333866596221924
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,16,4,128,1,float16,fp8,0,0.04041066765785217
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,16,8,128,1,float16,float16,0,0.034058667719364166
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,16,8,128,1,float16,fp8,0,0.04382933179537455
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,16,16,128,1,float16,float16,0,0.02216000109910965
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,16,16,128,1,float16,fp8,0,0.02828799933195114
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,16,1,128,1,float16,float16,0,0.01757866640885671
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,16,1,128,1,float16,fp8,0,0.02045866722861926
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,16,2,128,1,float16,float16,0,0.01878400022784869
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,16,2,128,1,float16,fp8,0,0.02143466720978419
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,16,4,128,1,float16,float16,0,0.021386665602525074
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,16,8,128,1,float16,float16,0,0.0215786670645078
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,16,4,128,1,float16,fp8,0,0.025226667523384094
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,16,8,128,1,float16,fp8,0,0.02720533311367035
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,16,16,128,1,float16,float16,0,0.01884799947341283
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,16,16,128,1,float16,fp8,0,0.024133334557215374
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,16,1,128,1,float16,float16,0,0.016650666793187458
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,16,1,128,1,float16,fp8,0,0.020053333292404812
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,16,2,128,1,float16,float16,0,0.01682666689157486
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,16,2,128,1,float16,fp8,0,0.01931200052301089
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,16,4,128,1,float16,float16,0,0.018351999421914417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,16,4,128,1,float16,fp8,0,0.021690666675567627
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,16,8,128,1,float16,float16,0,0.018437333405017853
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,16,8,128,1,float16,fp8,0,0.02272533377011617
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,16,16,128,1,float16,float16,0,0.01687466725707054
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,16,16,128,1,float16,fp8,0,0.02195200075705846
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,16,1,128,1,float16,fp8,0,0.019237333287795384
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,16,1,128,1,float16,float16,0,0.01732800031701724
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,16,2,128,1,float16,float16,0,0.016741332908471424
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,16,2,128,1,float16,fp8,0,0.018917333334684372
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,16,4,128,1,float16,float16,0,0.01703466723362605
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,16,4,128,1,float16,fp8,0,0.019466667125622433
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,16,8,128,1,float16,float16,0,0.01718933383623759
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,16,8,128,1,float16,fp8,0,0.02065066620707512
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,16,1,128,1,float16,float16,0,0.23886932929356894
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,16,2,128,1,float16,float16,0,0.26977600653966266
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,16,2,128,1,float16,fp8,0,0.27853333950042725
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,16,16,128,1,float16,float16,0,0.25438400109608966
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,16,16,128,1,float16,fp8,0,0.27993067105611164
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,16,1,128,1,float16,fp8,0,0.2694293260574341
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,16,4,128,1,float16,float16,0,0.4297120173772176
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,16,4,128,1,float16,fp8,0,0.48695464928944904
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,16,1,128,1,float16,float16,0,0.1309866706530253
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,16,8,128,1,float16,float16,0,0.4484320084253947
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,16,8,128,1,float16,fp8,0,0.508133331934611
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,16,1,128,1,float16,fp8,0,0.14895466963450113
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,16,2,128,1,float16,fp8,0,0.15157866477966309
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,16,2,128,1,float16,float16,0,0.14869333306948343
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,16,4,128,1,float16,float16,0,0.227567990620931
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,16,4,128,1,float16,fp8,0,0.2633919914563497
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,16,8,128,1,float16,float16,0,0.23127466440200806
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,16,16,128,1,float16,float16,0,0.1367573340733846
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,16,8,128,1,float16,fp8,0,0.2627466718355815
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,16,16,128,1,float16,fp8,0,0.15212800105412802
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,16,1,128,1,float16,float16,0,0.07580799857775371
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,16,1,128,1,float16,fp8,0,0.084197332461675
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,16,2,128,1,float16,float16,0,0.08042133351167043
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,16,2,128,1,float16,fp8,0,0.08460799853006999
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,16,4,128,1,float16,float16,0,0.12171733379364014
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,16,4,128,1,float16,fp8,0,0.14179199934005737
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,16,8,128,1,float16,float16,0,0.12494933605194092
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,16,16,128,1,float16,float16,0,0.07631466786066692
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,16,8,128,1,float16,fp8,0,0.1441920002301534
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,16,16,128,1,float16,fp8,0,0.08482666810353597
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,16,1,128,1,float16,float16,0,0.03821333249409994
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,16,1,128,1,float16,fp8,0,0.0415786678592364
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,16,2,128,1,float16,float16,0,0.040949332217375435
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,16,2,128,1,float16,fp8,0,0.04598399996757507
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,16,4,128,1,float16,float16,0,0.06676800052324931
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,16,4,128,1,float16,fp8,0,0.06930133203665416
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,16,8,128,1,float16,float16,0,0.069733331600825
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,16,8,128,1,float16,fp8,0,0.07972800234953563
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,16,16,128,1,float16,float16,0,0.04186133543650309
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,16,16,128,1,float16,fp8,0,0.043103997906049095
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,16,1,128,1,float16,float16,0,0.022757334013779957
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,16,1,128,1,float16,fp8,0,0.026474667092164356
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,16,2,128,1,float16,float16,0,0.025087999800841015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,16,2,128,1,float16,fp8,0,0.028837333122889202
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,16,4,128,1,float16,float16,0,0.030181333422660828
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,16,4,128,1,float16,fp8,0,0.03617066641648611
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,16,1,128,1,float16,float16,0,0.015978666643301647
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,16,8,128,1,float16,float16,0,0.03188266605138779
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,16,8,128,1,float16,fp8,0,0.036474667489528656
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,16,16,128,1,float16,float16,0,0.02046400060256322
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,16,16,128,1,float16,fp8,0,0.023792001108328503
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,16,1,128,1,float16,fp8,0,0.01851733277241389
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,16,2,128,1,float16,float16,0,0.01720000058412552
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,16,2,128,1,float16,fp8,0,0.019946667055288952
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,16,4,128,1,float16,float16,0,0.01982933282852173
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,16,4,128,1,float16,fp8,0,0.023007998863856
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,16,8,128,1,float16,float16,0,0.019882666567961376
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,16,8,128,1,float16,fp8,0,0.02342933416366577
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,16,16,128,1,float16,float16,0,0.017184000462293625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,16,16,128,1,float16,fp8,0,0.019925333559513092
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,16,1,128,1,float16,float16,0,0.015439999600251516
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,16,1,128,1,float16,fp8,0,0.01754666616519292
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,16,2,128,1,float16,float16,0,0.015066667149464289
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,16,2,128,1,float16,fp8,0,0.017664000391960144
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,16,4,128,1,float16,float16,0,0.01676799977819125
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,16,4,128,1,float16,fp8,0,0.01960533360640208
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,16,8,128,1,float16,float16,0,0.016986666868130367
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,16,8,128,1,float16,fp8,0,0.01995733380317688
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,16,16,128,1,float16,float16,0,0.01524266724785169
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,16,16,128,1,float16,fp8,0,0.017557332913080852
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,16,1,128,1,float16,float16,0,0.014767999450365702
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,16,1,128,1,float16,fp8,0,0.017456000049908955
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,16,2,128,1,float16,float16,0,0.014709333578745524
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,16,2,128,1,float16,fp8,0,0.017237332959969837
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,16,4,128,1,float16,float16,0,0.015471999843915304
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,16,1,128,1,float16,float16,0,0.014138666292031607
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,16,4,128,1,float16,fp8,0,0.017701332767804463
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,16,8,128,1,float16,float16,0,0.01525866612792015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,16,8,128,1,float16,fp8,0,0.017605333278576534
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,16,16,128,1,float16,fp8,0,0.016496000190575916
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,16,16,128,1,float16,float16,0,0.01414399966597557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,16,1,128,1,float16,fp8,0,0.016501333564519882
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,16,2,128,1,float16,float16,0,0.014271999398867289
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,16,2,128,1,float16,fp8,0,0.01695999999841054
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,16,4,128,1,float16,float16,0,0.01431999976436297
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,16,4,128,1,float16,fp8,0,0.016373333831628162
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,16,8,128,1,float16,float16,0,0.014890667051076889
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,16,8,128,1,float16,fp8,0,0.016490666816631954
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,16,1,128,1,float16,float16,0,0.1283146639664968
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,16,1,128,1,float16,fp8,0,0.1467359960079193
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,16,2,128,1,float16,float16,0,0.14406399925549826
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,16,2,128,1,float16,fp8,0,0.15010133385658264
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,16,4,128,1,float16,float16,0,0.22714134057362875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,16,4,128,1,float16,fp8,0,0.2642186681429545
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,16,16,128,1,float16,float16,0,0.14893866578737894
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,16,16,128,1,float16,fp8,0,0.1476640005906423
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,16,8,128,1,float16,float16,0,0.2348960041999817
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,16,1,128,1,float16,float16,0,0.07543466488520305
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,16,1,128,1,float16,fp8,0,0.08431466420491536
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,16,8,128,1,float16,fp8,0,0.27135467529296875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,16,2,128,1,float16,float16,0,0.0804319977760315
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,16,2,128,1,float16,fp8,0,0.08437333504358928
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,16,8,128,1,float16,float16,0,0.12521599729855856
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,16,4,128,1,float16,float16,0,0.12230400244394939
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,16,4,128,1,float16,fp8,0,0.14225600163141885
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,16,16,128,1,float16,float16,0,0.08157333234945933
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,16,8,128,1,float16,fp8,0,0.14285332957903543
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,16,16,128,1,float16,fp8,0,0.08217599987983704
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,16,1,128,1,float16,float16,0,0.03740799923737844
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,16,1,128,1,float16,fp8,0,0.04156800111134847
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,16,2,128,1,float16,float16,0,0.0408746674656868
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,16,2,128,1,float16,fp8,0,0.04574400186538696
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,16,4,128,1,float16,float16,0,0.06541866560777028
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,16,4,128,1,float16,fp8,0,0.06836266815662384
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,16,8,128,1,float16,float16,0,0.06923733154932658
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,16,1,128,1,float16,fp8,0,0.026261332134405773
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,16,8,128,1,float16,fp8,0,0.07897066573301952
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,16,16,128,1,float16,float16,0,0.04534933467706045
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,16,16,128,1,float16,fp8,0,0.03921066721280416
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,16,1,128,1,float16,float16,0,0.022474666436513264
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,16,2,128,1,float16,float16,0,0.024688000480333965
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,16,2,128,1,float16,fp8,0,0.02869333326816559
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,16,4,128,1,float16,float16,0,0.030320001145203907
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,16,4,128,1,float16,fp8,0,0.03591466695070267
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,16,8,128,1,float16,float16,0,0.031023999055226643
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,16,8,128,1,float16,fp8,0,0.03659733384847641
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,16,16,128,1,float16,float16,0,0.021914665897687275
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,16,16,128,1,float16,fp8,0,0.023269332945346832
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,16,1,128,1,float16,float16,0,0.016063999384641647
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,16,1,128,1,float16,fp8,0,0.01850133389234543
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,16,2,128,1,float16,float16,0,0.017125333348910015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,16,2,128,1,float16,fp8,0,0.019653332730134327
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,16,4,128,1,float16,float16,0,0.019760000209013622
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,16,4,128,1,float16,fp8,0,0.023365333676338196
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,16,8,128,1,float16,float16,0,0.019717333217461903
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,16,8,128,1,float16,fp8,0,0.023311999936898548
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,16,16,128,1,float16,float16,0,0.015504000087579092
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,16,16,128,1,float16,fp8,0,0.016154666741689045
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,16,1,128,1,float16,float16,0,0.014970666418472925
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,16,1,128,1,float16,fp8,0,0.01791999985774358
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,16,2,128,1,float16,float16,0,0.015061333775520325
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,16,2,128,1,float16,fp8,0,0.017242666333913803
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,16,4,128,1,float16,float16,0,0.016645333419243496
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,16,4,128,1,float16,fp8,0,0.01952533299724261
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,16,8,128,1,float16,float16,0,0.01664000004529953
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,16,8,128,1,float16,fp8,0,0.019466667125622433
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,16,16,128,1,float16,float16,0,0.013354666531085968
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,16,16,128,1,float16,fp8,0,0.014384000251690546
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,16,1,128,1,float16,float16,0,0.014783999572197596
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,16,1,128,1,float16,fp8,0,0.017258666455745697
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,16,2,128,1,float16,float16,0,0.014922666052977243
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,16,2,128,1,float16,fp8,0,0.017136000096797943
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,16,4,128,1,float16,float16,0,0.015018666783968607
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,16,4,128,1,float16,fp8,0,0.017727999637524288
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,16,8,128,1,float16,float16,0,0.014885333677132925
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,16,8,128,1,float16,fp8,0,0.017498667041460674
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,16,16,128,1,float16,float16,0,0.01322666679819425
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,16,16,128,1,float16,fp8,0,0.013722666849692663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,16,1,128,1,float16,float16,0,0.014122666170199713
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,16,1,128,1,float16,fp8,0,0.016303999970356624
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,16,2,128,1,float16,float16,0,0.014335999886194864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,16,2,128,1,float16,fp8,0,0.015989333391189575
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,16,4,128,1,float16,float16,0,0.014373333503802618
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,16,4,128,1,float16,fp8,0,0.01639466608564059
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,16,8,128,1,float16,float16,0,0.014032000054915747
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,16,8,128,1,float16,fp8,0,0.016255999604860943
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,16,16,128,1,float16,float16,0,0.012191999703645706
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,16,16,128,1,float16,fp8,0,0.013050666699806849
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,16,1,128,1,float16,float16,0,0.013909333695967993
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,16,1,128,1,float16,fp8,0,0.01613333324591319
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,16,2,128,1,float16,float16,0,0.013776000589132309
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,16,2,128,1,float16,fp8,0,0.015728000551462173
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,16,4,128,1,float16,float16,0,0.013957332819700241
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,16,4,128,1,float16,fp8,0,0.01589866727590561
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,16,8,128,1,float16,float16,0,0.013760000467300415
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,16,8,128,1,float16,fp8,0,0.016042667130629223
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,16,1,128,1,float16,float16,0,0.0732426643371582
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,16,1,128,1,float16,fp8,0,0.08533866206804912
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,16,2,128,1,float16,float16,0,0.07934933404127757
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,16,2,128,1,float16,fp8,0,0.08496000369389851
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,16,4,128,1,float16,float16,0,0.12034133076667786
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,16,4,128,1,float16,fp8,0,0.13991999626159668
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,16,16,128,1,float16,float16,0,0.10681600371996562
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,16,16,128,1,float16,fp8,0,0.10751466949780782
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,16,8,128,1,float16,float16,0,0.13565867145856222
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,16,1,128,1,float16,float16,0,0.03726933399836222
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,16,2,128,1,float16,float16,0,0.040336000422636666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,16,8,128,1,float16,fp8,0,0.1437013347943624
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,16,1,128,1,float16,fp8,0,0.04196266829967499
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,16,4,128,1,float16,float16,0,0.06638399759928386
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,16,2,128,1,float16,fp8,0,0.04604266583919525
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,16,4,128,1,float16,fp8,0,0.06795733173688252
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,16,8,128,1,float16,float16,0,0.0757066657145818
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,16,16,128,1,float16,float16,0,0.05804799993832906
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,16,8,128,1,float16,fp8,0,0.07613866527875264
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,16,16,128,1,float16,fp8,0,0.053264002005259194
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,16,2,128,1,float16,fp8,0,0.02884799987077713
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,16,1,128,1,float16,float16,0,0.022629333039124806
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,16,1,128,1,float16,fp8,0,0.026421333352724712
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,16,2,128,1,float16,float16,0,0.02480533222357432
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,16,4,128,1,float16,float16,0,0.030005333324273426
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,16,4,128,1,float16,fp8,0,0.03576533248027166
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,16,8,128,1,float16,float16,0,0.03491200009981791
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,16,8,128,1,float16,fp8,0,0.03610666592915853
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,16,16,128,1,float16,float16,0,0.028405333558718365
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,16,16,128,1,float16,fp8,0,0.0295413335164388
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,16,1,128,1,float16,float16,0,0.015941333025693893
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,16,1,128,1,float16,fp8,0,0.01860800012946129
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,16,2,128,1,float16,float16,0,0.016885332763195038
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,16,2,128,1,float16,fp8,0,0.01956266661485036
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,16,4,128,1,float16,float16,0,0.019541333119074505
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,16,4,128,1,float16,fp8,0,0.02319466571013133
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,16,8,128,1,float16,float16,0,0.021642667551835377
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,16,8,128,1,float16,fp8,0,0.022954667607943218
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,16,16,128,1,float16,float16,0,0.018906666586796444
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,16,16,128,1,float16,fp8,0,0.019909333437681198
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,16,1,128,1,float16,float16,0,0.015178666760524115
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,16,1,128,1,float16,fp8,0,0.017509333789348602
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,16,2,128,1,float16,float16,0,0.015002666662136713
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,16,2,128,1,float16,fp8,0,0.017450666675964992
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,16,16,128,1,float16,float16,0,0.013557333499193192
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,16,4,128,1,float16,float16,0,0.01655999943614006
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,16,4,128,1,float16,fp8,0,0.019658666104078293
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,16,2,128,1,float16,float16,0,0.014954666296641031
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,16,8,128,1,float16,float16,0,0.015173333386580149
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,16,8,128,1,float16,fp8,0,0.016106666376193363
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,16,16,128,1,float16,fp8,0,0.014229333649079004
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,16,1,128,1,float16,float16,0,0.01463466634353002
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,16,1,128,1,float16,fp8,0,0.017429333180189133
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,16,2,128,1,float16,fp8,0,0.01749333366751671
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,16,4,128,1,float16,float16,0,0.015226667126019796
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,16,4,128,1,float16,fp8,0,0.017893332988023758
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,16,8,128,1,float16,float16,0,0.01321600005030632
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,16,8,128,1,float16,fp8,0,0.01414399966597557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,16,16,128,1,float16,float16,0,0.012725333372751871
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,16,16,128,1,float16,fp8,0,0.013823999712864557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,16,1,128,1,float16,float16,0,0.01404800017674764
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,16,1,128,1,float16,fp8,0,0.016538667182127636
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,16,2,128,1,float16,float16,0,0.014229333649079004
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,16,2,128,1,float16,fp8,0,0.016069332758585613
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,16,4,128,1,float16,float16,0,0.014458666245142618
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,16,4,128,1,float16,fp8,0,0.0163680004576842
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,16,8,128,1,float16,float16,0,0.012800000607967377
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,16,8,128,1,float16,fp8,0,0.013749333719412485
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,16,16,128,1,float16,float16,0,0.01267733300725619
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,16,16,128,1,float16,fp8,0,0.01350933313369751
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,16,1,128,1,float16,float16,0,0.013983999689420065
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,16,1,128,1,float16,fp8,0,0.016016000260909397
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,16,2,128,1,float16,float16,0,0.013690666606028875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,16,2,128,1,float16,fp8,0,0.016000000139077503
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,16,4,128,1,float16,float16,0,0.013557333499193192
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,16,4,128,1,float16,fp8,0,0.015957333147525787
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,16,8,128,1,float16,float16,0,0.013194666554530462
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,16,8,128,1,float16,fp8,0,0.012789333860079447
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,16,16,128,1,float16,float16,0,0.012202666451533636
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,16,16,128,1,float16,fp8,0,0.013258667041858038
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,16,1,128,1,float16,float16,0,0.013994666437307993
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,16,1,128,1,float16,fp8,0,0.015583999454975128
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,16,2,128,1,float16,float16,0,0.013354666531085968
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,16,2,128,1,float16,fp8,0,0.015658666690190632
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,16,4,128,1,float16,float16,0,0.013424000392357508
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,16,4,128,1,float16,fp8,0,0.015664000064134598
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,16,8,128,1,float16,float16,0,0.01201066623131434
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,16,8,128,1,float16,fp8,0,0.01240533341964086
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,16,1,128,1,float16,float16,0,0.03790933390458425
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,16,1,128,1,float16,fp8,0,0.040906667709350586
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,16,2,128,1,float16,float16,0,0.04035199930270513
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,16,2,128,1,float16,fp8,0,0.04527466495831808
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,16,4,128,1,float16,float16,0,0.07172266642252605
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,16,4,128,1,float16,fp8,0,0.06485333542029063
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,16,16,128,1,float16,float16,0,0.08361599842707317
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,16,8,128,1,float16,float16,0,0.10033599535624187
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,16,16,128,1,float16,fp8,0,0.07772266864776611
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,16,8,128,1,float16,fp8,0,0.10114133358001709
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,16,1,128,1,float16,float16,0,0.022672000030676525
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,16,1,128,1,float16,fp8,0,0.02624533325433731
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,16,2,128,1,float16,float16,0,0.024725332856178284
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,16,2,128,1,float16,fp8,0,0.028490667541821797
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,16,4,128,1,float16,float16,0,0.03334933271010717
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,16,4,128,1,float16,fp8,0,0.035690667728583016
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,16,8,128,1,float16,float16,0,0.04828266799449921
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,16,8,128,1,float16,fp8,0,0.04808000226815542
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,16,16,128,1,float16,float16,0,0.04099733382463455
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,16,16,128,1,float16,fp8,0,0.04190933207670847
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,16,1,128,1,float16,float16,0,0.01613866661985715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,16,1,128,1,float16,fp8,0,0.018645333747069042
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,16,2,128,1,float16,float16,0,0.016864000509182613
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,16,2,128,1,float16,fp8,0,0.01933866615096728
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,16,4,128,1,float16,float16,0,0.021210665504137676
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,16,4,128,1,float16,fp8,0,0.02276266614596049
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,16,8,128,1,float16,float16,0,0.02811199923356374
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,16,8,128,1,float16,fp8,0,0.02900800108909607
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,16,16,128,1,float16,float16,0,0.025407999753952026
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,16,2,128,1,float16,fp8,0,0.017583999782800674
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,16,16,128,1,float16,fp8,0,0.025994665920734406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,16,1,128,1,float16,fp8,0,0.017450666675964992
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,16,1,128,1,float16,float16,0,0.015279999623696009
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,16,2,128,1,float16,float16,0,0.01516266663869222
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,16,4,128,1,float16,float16,0,0.014997333288192749
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,16,4,128,1,float16,fp8,0,0.015696000307798386
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,16,8,128,1,float16,float16,0,0.01854933301607768
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,16,8,128,1,float16,fp8,0,0.019600000232458115
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,16,16,128,1,float16,float16,0,0.01728533332546552
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,16,2,128,1,float16,fp8,0,0.016735999534527462
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,16,16,128,1,float16,fp8,0,0.01777600000301997
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,16,1,128,1,float16,float16,0,0.014975999792416891
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,16,1,128,1,float16,fp8,0,0.017418666432301205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,16,2,128,1,float16,float16,0,0.0144213338692983
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,16,4,128,1,float16,float16,0,0.013418667018413544
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,16,4,128,1,float16,fp8,0,0.013546666751305262
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,16,8,128,1,float16,float16,0,0.013594667116800943
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,16,8,128,1,float16,fp8,0,0.014015999933083853
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,16,16,128,1,float16,float16,0,0.013269333789745966
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,16,16,128,1,float16,fp8,0,0.013781332721312841
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,16,1,128,1,float16,float16,0,0.01404800017674764
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,16,1,128,1,float16,fp8,0,0.016501333564519882
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,16,2,128,1,float16,float16,0,0.013760000467300415
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,16,2,128,1,float16,fp8,0,0.01584533353646596
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,16,4,128,1,float16,float16,0,0.013125333935022354
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,16,4,128,1,float16,fp8,0,0.013210666676362356
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,16,8,128,1,float16,float16,0,0.013104000439246496
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,16,8,128,1,float16,fp8,0,0.013797332843144735
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,16,16,128,1,float16,float16,0,0.013007999708255133
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,16,16,128,1,float16,fp8,0,0.013701333353916803
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,16,1,128,1,float16,float16,0,0.014021333307027817
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,16,4,128,1,float16,fp8,0,0.012506666282812754
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,16,1,128,1,float16,fp8,0,0.016074666132529575
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,16,2,128,1,float16,float16,0,0.013450667262077332
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,16,2,128,1,float16,fp8,0,0.015589332828919092
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,16,4,128,1,float16,float16,0,0.012335999558369318
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,16,8,128,1,float16,float16,0,0.012634667257467905
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,16,8,128,1,float16,fp8,0,0.013536000003417334
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,16,16,128,1,float16,float16,0,0.012479999413092932
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,16,16,128,1,float16,fp8,0,0.013050666699806849
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,16,1,128,1,float16,float16,0,0.013493333011865616
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,16,1,128,1,float16,fp8,0,0.015717333803574245
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,16,2,128,1,float16,float16,0,0.013157332936922709
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,16,2,128,1,float16,fp8,0,0.015098666151364645
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,16,4,128,1,float16,float16,0,0.011941333611806234
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,16,4,128,1,float16,fp8,0,0.012341332932313284
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,16,8,128,1,float16,float16,0,0.0122079998254776
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,16,8,128,1,float16,fp8,0,0.012362666428089142
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,16,16,128,1,float16,float16,0,0.011994666109482447
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,16,16,128,1,float16,fp8,0,0.01258133351802826
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,16,1,128,1,float16,float16,0,0.013552000125249227
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,16,1,128,1,float16,fp8,0,0.01573866605758667
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,16,2,128,1,float16,float16,0,0.012991999586423239
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,16,2,128,1,float16,fp8,0,0.015157333264748255
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,16,4,128,1,float16,float16,0,0.01166933278242747
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,16,4,128,1,float16,fp8,0,0.012042666474978128
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,16,8,128,1,float16,float16,0,0.011663999408483505
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,16,8,128,1,float16,fp8,0,0.012069333344697952
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,12,12,128,1,float16,float16,0,6.040576299031575
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,12,1,128,1,float16,fp8,0,10.553818384806315
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,12,1,128,1,float16,float16,0,12.186912536621094
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,12,2,128,1,float16,fp8,0,10.701093037923178
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,12,2,128,1,float16,float16,0,11.964847564697266
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,12,4,128,1,float16,float16,0,12.220437367757162
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,12,4,128,1,float16,fp8,0,11.491252899169922
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,12,12,128,1,float16,fp8,0,5.9613602956136065
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,12,1,128,1,float16,float16,0,5.832815806070964
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,12,12,128,1,float16,float16,0,2.9994986852010093
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,12,1,128,1,float16,fp8,0,5.8160050710042315
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,12,2,128,1,float16,float16,0,5.799013137817383
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,12,4,128,1,float16,float16,0,6.079888025919597
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,12,4,128,1,float16,fp8,0,5.920554478963216
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,12,12,128,1,float16,fp8,0,3.038576126098633
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,12,2,128,1,float16,fp8,0,5.39634641011556
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,12,1,128,1,float16,float16,0,2.750901222229004
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,12,1,128,1,float16,fp8,0,2.8266026178995767
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,12,2,128,1,float16,float16,0,2.7782773971557617
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,12,2,128,1,float16,fp8,0,2.9647947947184243
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,12,4,128,1,float16,float16,0,3.001162528991699
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,12,12,128,1,float16,float16,0,1.559333324432373
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,12,12,128,1,float16,fp8,0,1.5822772979736328
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,12,1,128,1,float16,float16,0,1.4272640546162922
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,12,1,128,1,float16,fp8,0,1.444778601328532
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,12,4,128,1,float16,fp8,0,2.986757278442383
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,12,2,128,1,float16,float16,0,1.4675839742024739
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,12,2,128,1,float16,fp8,0,1.511013348897298
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,12,4,128,1,float16,float16,0,1.5315626462300618
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,12,4,128,1,float16,fp8,0,1.566175937652588
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,12,1,128,1,float16,float16,0,6.5966237386067705
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,12,12,128,1,float16,float16,0,3.5175253550211587
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,12,1,128,1,float16,fp8,0,6.120384216308594
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,12,12,128,1,float16,fp8,0,3.522869427998861
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,12,2,128,1,float16,float16,0,6.843231836954753
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,12,2,128,1,float16,fp8,0,6.241861343383789
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,12,4,128,1,float16,float16,0,6.943509419759114
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,12,4,128,1,float16,fp8,0,6.782506942749023
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,12,1,128,1,float16,float16,0,3.133861223856608
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,12,1,128,1,float16,fp8,0,3.199861208597819
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,12,2,128,1,float16,float16,0,3.182938575744629
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,12,2,128,1,float16,fp8,0,3.1718454360961914
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,12,12,128,1,float16,float16,0,1.8344747225443523
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,12,12,128,1,float16,fp8,0,1.8289546966552734
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,12,4,128,1,float16,float16,0,3.41155211130778
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,12,4,128,1,float16,fp8,0,3.4519678751627603
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,12,1,128,1,float16,float16,0,1.6132159233093262
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,12,1,128,1,float16,fp8,0,1.6331413586934407
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,12,2,128,1,float16,float16,0,1.655637264251709
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,12,2,128,1,float16,fp8,0,1.6245652834574382
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,12,4,128,1,float16,float16,0,1.727717399597168
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,12,1,128,1,float16,float16,0,0.8352266947428385
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,12,12,128,1,float16,float16,0,0.9555946985880533
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,12,4,128,1,float16,fp8,0,1.790229320526123
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,12,12,128,1,float16,fp8,0,0.9766080379486084
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,12,1,128,1,float16,fp8,0,0.8555306593577067
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,12,2,128,1,float16,float16,0,0.8745493094126383
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,12,2,128,1,float16,fp8,0,0.8652959664662679
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,12,4,128,1,float16,float16,0,0.907584031422933
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,12,4,128,1,float16,fp8,0,0.9850613276163737
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,12,1,128,1,float16,float16,0,4.468207995096843
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,12,1,128,1,float16,fp8,0,4.32862917582194
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,12,12,128,1,float16,float16,0,2.5107359886169434
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,12,2,128,1,float16,float16,0,4.433183987935384
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,12,2,128,1,float16,fp8,0,4.428688049316406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,12,12,128,1,float16,fp8,0,2.557509263356527
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,12,4,128,1,float16,float16,0,4.961263974507649
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,12,4,128,1,float16,fp8,0,4.852506637573242
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,12,1,128,1,float16,float16,0,2.4269332885742188
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,12,1,128,1,float16,fp8,0,2.211381276448568
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,12,2,128,1,float16,float16,0,2.2548267046610513
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,12,2,128,1,float16,fp8,0,2.3410560290018716
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,12,12,128,1,float16,float16,0,1.4182292620340984
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,12,4,128,1,float16,float16,0,2.422544002532959
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,12,12,128,1,float16,fp8,0,1.328554630279541
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,12,1,128,1,float16,float16,0,1.146389325459798
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,12,1,128,1,float16,fp8,0,1.1549653212229412
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,12,4,128,1,float16,fp8,0,2.497605323791504
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,12,2,128,1,float16,float16,0,1.1725386778513591
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,12,2,128,1,float16,fp8,0,1.1908640066782634
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,12,4,128,1,float16,float16,0,1.2616746425628662
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,12,12,128,1,float16,float16,0,0.6895039876302084
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,12,1,128,1,float16,fp8,0,0.6211839914321899
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,12,4,128,1,float16,fp8,0,1.2980266412099202
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,12,12,128,1,float16,fp8,0,0.7425066630045573
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,12,1,128,1,float16,float16,0,0.5831679900487264
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,12,4,128,1,float16,fp8,0,0.6952266693115234
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,12,2,128,1,float16,float16,0,0.6185119946797689
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,12,2,128,1,float16,fp8,0,0.6371253331502279
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,12,4,128,1,float16,float16,0,0.6648480097452799
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,12,1,128,1,float16,float16,0,5.863157272338867
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,12,12,128,1,float16,float16,0,3.3071467081705728
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,12,1,128,1,float16,fp8,0,5.632426579793294
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,12,12,128,1,float16,fp8,0,3.3932374318440757
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,12,2,128,1,float16,float16,0,5.999253590901692
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,12,2,128,1,float16,fp8,0,5.751109441121419
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,12,4,128,1,float16,float16,0,6.668394724527995
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,12,4,128,1,float16,fp8,0,6.518208185831706
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,12,1,128,1,float16,float16,0,2.920720100402832
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,12,1,128,1,float16,fp8,0,2.856570561726888
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,12,2,128,1,float16,float16,0,2.968997319539388
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,12,12,128,1,float16,float16,0,1.7005066871643066
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,12,2,128,1,float16,fp8,0,3.012026786804199
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,12,4,128,1,float16,float16,0,3.176608085632324
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,12,4,128,1,float16,fp8,0,3.501551946004232
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,12,12,128,1,float16,fp8,0,2.002565383911133
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,12,1,128,1,float16,float16,0,1.4216586748758953
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,12,1,128,1,float16,fp8,0,1.5529279708862305
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,12,2,128,1,float16,float16,0,1.4963466326395671
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,12,2,128,1,float16,fp8,0,1.5329920450846355
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,12,4,128,1,float16,float16,0,1.631706714630127
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,12,12,128,1,float16,float16,0,0.8958720366160074
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,12,12,128,1,float16,fp8,0,0.9290986855824789
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,12,4,128,1,float16,fp8,0,1.7541866302490234
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,12,1,128,1,float16,float16,0,0.7461439768473307
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,12,1,128,1,float16,fp8,0,0.7688533465067545
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,12,2,128,1,float16,float16,0,0.7827253341674805
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,12,2,128,1,float16,fp8,0,0.8284053007761637
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,12,4,128,1,float16,float16,0,0.8674986362457275
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,12,4,128,1,float16,fp8,0,0.8988320032755533
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,12,12,128,1,float16,float16,0,0.47728534539540607
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,12,12,128,1,float16,fp8,0,0.5087786515553793
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,12,1,128,1,float16,float16,0,0.40132800738016766
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,12,2,128,1,float16,fp8,0,0.4397066831588745
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,12,1,128,1,float16,fp8,0,0.4212586482365926
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,12,2,128,1,float16,float16,0,0.4182346661885579
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,12,4,128,1,float16,float16,0,0.46376534303029376
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,12,4,128,1,float16,fp8,0,0.48849066098531085
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,12,1,128,1,float16,float16,0,3.245024045308431
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,12,1,128,1,float16,fp8,0,3.314469337463379
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,12,2,128,1,float16,float16,0,3.39029852549235
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,12,12,128,1,float16,float16,0,2.03602663675944
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,12,2,128,1,float16,fp8,0,3.4619468053181968
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,12,12,128,1,float16,fp8,0,2.0791519482930503
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,12,4,128,1,float16,float16,0,3.8452587127685547
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,12,4,128,1,float16,fp8,0,3.9941867192586265
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,12,1,128,1,float16,float16,0,1.666122595469157
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,12,1,128,1,float16,fp8,0,1.688576062520345
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,12,2,128,1,float16,float16,0,1.7369759877522786
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,12,2,128,1,float16,fp8,0,1.7476693789164226
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,12,12,128,1,float16,float16,0,1.0499359766642253
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,12,4,128,1,float16,float16,0,1.938261349995931
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,12,1,128,1,float16,float16,0,0.8896533648173014
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,12,12,128,1,float16,fp8,0,1.0902400016784668
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,12,4,128,1,float16,fp8,0,2.0264107386271157
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,12,1,128,1,float16,fp8,0,0.8813493251800537
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,12,2,128,1,float16,float16,0,0.9016746679941813
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,12,2,128,1,float16,fp8,0,0.9054186344146729
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,12,4,128,1,float16,float16,0,1.0098346869150798
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,12,12,128,1,float16,float16,0,0.5542773405710856
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,12,4,128,1,float16,fp8,0,1.0662133693695068
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,12,12,128,1,float16,fp8,0,0.5854560136795044
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,12,1,128,1,float16,float16,0,0.4415680170059204
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,12,1,128,1,float16,fp8,0,0.47219733397165936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,12,2,128,1,float16,float16,0,0.47707200050354004
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,12,2,128,1,float16,fp8,0,0.4858826796213786
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,12,4,128,1,float16,float16,0,0.5299839973449707
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,12,4,128,1,float16,fp8,0,0.5738399823506674
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,12,12,128,1,float16,float16,0,0.3040906588236491
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,12,12,128,1,float16,fp8,0,0.32869333028793335
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,12,1,128,1,float16,float16,0,0.24579733610153198
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,12,1,128,1,float16,fp8,0,0.2619360089302063
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,12,2,128,1,float16,float16,0,0.2618559996287028
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,12,2,128,1,float16,fp8,0,0.2692799965540568
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,12,4,128,1,float16,float16,0,0.29420266548792523
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,12,4,128,1,float16,fp8,0,0.31489066282908124
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,12,1,128,1,float16,float16,0,3.016042709350586
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,12,1,128,1,float16,fp8,0,3.169557253519694
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,12,2,128,1,float16,float16,0,3.2473068237304688
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,12,12,128,1,float16,float16,0,2.0510560671488443
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,12,2,128,1,float16,fp8,0,3.321807861328125
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,12,12,128,1,float16,fp8,0,2.119743982950846
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,12,4,128,1,float16,float16,0,3.8727038701375327
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,12,1,128,1,float16,float16,0,1.528096040089925
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,12,4,128,1,float16,fp8,0,4.05895455678304
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,12,1,128,1,float16,fp8,0,1.5995786984761555
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,12,2,128,1,float16,float16,0,1.681946595509847
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,12,2,128,1,float16,fp8,0,1.7019306818644206
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,12,12,128,1,float16,float16,0,1.0557493368784587
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,12,4,128,1,float16,float16,0,1.9648426373799641
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,12,4,128,1,float16,fp8,0,2.076085408528646
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,12,1,128,1,float16,float16,0,0.7861706415812174
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,12,12,128,1,float16,fp8,0,1.1335840225219727
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,12,1,128,1,float16,fp8,0,0.8240479628245035
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,12,2,128,1,float16,float16,0,0.8876533508300781
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,12,2,128,1,float16,fp8,0,0.8801546891530355
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,12,4,128,1,float16,float16,0,0.9972426891326904
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,12,1,128,1,float16,float16,0,0.41177598635355633
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,12,12,128,1,float16,float16,0,0.5466506481170654
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,12,4,128,1,float16,fp8,0,1.0603893597920735
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,12,12,128,1,float16,fp8,0,0.5853386720021566
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,12,1,128,1,float16,fp8,0,0.43785067399342853
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,12,2,128,1,float16,float16,0,0.44623998800913495
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,12,2,128,1,float16,fp8,0,0.46671466032663983
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,12,4,128,1,float16,float16,0,0.5176426569620768
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,12,4,128,1,float16,fp8,0,0.5613280137379965
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,12,12,128,1,float16,float16,0,0.29847466945648193
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,12,12,128,1,float16,fp8,0,0.3192053238550822
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,12,1,128,1,float16,float16,0,0.22771199544270834
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,12,1,128,1,float16,fp8,0,0.24243199825286865
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,12,2,128,1,float16,float16,0,0.24858667453130087
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,12,2,128,1,float16,fp8,0,0.2553760011990865
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,12,4,128,1,float16,float16,0,0.2827999989191691
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,12,4,128,1,float16,fp8,0,0.3081973393758138
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,12,12,128,1,float16,float16,0,0.17189866304397583
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,12,12,128,1,float16,fp8,0,0.18649599949518839
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,12,1,128,1,float16,float16,0,0.1269813378651937
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,12,1,128,1,float16,fp8,0,0.13411200046539307
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,12,2,128,1,float16,float16,0,0.13822399576505026
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,12,2,128,1,float16,fp8,0,0.14577600359916687
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,12,4,128,1,float16,float16,0,0.15705066919326782
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,12,4,128,1,float16,fp8,0,0.16461867094039917
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,12,1,128,1,float16,float16,0,1.8263306617736816
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,12,1,128,1,float16,fp8,0,1.934127966562907
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,12,2,128,1,float16,float16,0,1.9584266344706218
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,12,2,128,1,float16,fp8,0,2.0362772941589355
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,12,12,128,1,float16,float16,0,1.3228373527526855
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,12,4,128,1,float16,float16,0,2.40283203125
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,12,4,128,1,float16,fp8,0,2.571135997772217
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,12,1,128,1,float16,float16,0,0.9241920312245687
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,12,1,128,1,float16,fp8,0,0.9760479927062988
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,12,12,128,1,float16,fp8,0,1.4116907119750977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,12,2,128,1,float16,float16,0,1.0153066317240398
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,12,2,128,1,float16,fp8,0,1.0379520257314045
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,12,4,128,1,float16,float16,0,1.223749319712321
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,12,12,128,1,float16,float16,0,0.6710560321807861
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,12,4,128,1,float16,fp8,0,1.3065120379130046
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,12,12,128,1,float16,fp8,0,0.7187893390655518
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,12,2,128,1,float16,float16,0,0.5274879932403564
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,12,1,128,1,float16,float16,0,0.4787626663843791
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,12,1,128,1,float16,fp8,0,0.5133546590805054
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,12,2,128,1,float16,fp8,0,0.5499733289082845
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,12,4,128,1,float16,float16,0,0.6311786572138468
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,12,12,128,1,float16,float16,0,0.3556906779607137
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,12,2,128,1,float16,float16,0,0.2837760051091512
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,12,4,128,1,float16,fp8,0,0.6883573532104492
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,12,12,128,1,float16,fp8,0,0.38357333342234295
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,12,1,128,1,float16,float16,0,0.25684799750645954
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,12,1,128,1,float16,fp8,0,0.28013867139816284
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,12,2,128,1,float16,fp8,0,0.2978453238805135
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,12,4,128,1,float16,float16,0,0.33873601754506427
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,12,12,128,1,float16,float16,0,0.19760533173878989
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,12,4,128,1,float16,fp8,0,0.3718026479085286
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,12,12,128,1,float16,fp8,0,0.21753599246342978
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,12,1,128,1,float16,float16,0,0.14246933658917746
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,12,1,128,1,float16,fp8,0,0.1530080040295919
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,12,2,128,1,float16,float16,0,0.16316266854604086
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,12,2,128,1,float16,fp8,0,0.1662826637427012
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,12,4,128,1,float16,float16,0,0.18710400660832724
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,12,4,128,1,float16,fp8,0,0.20389866828918457
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,12,2,128,1,float16,float16,0,0.09121599793434143
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,12,12,128,1,float16,float16,0,0.11675199866294861
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,12,12,128,1,float16,fp8,0,0.13038399815559387
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,12,1,128,1,float16,float16,0,0.0848426620165507
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,12,1,128,1,float16,fp8,0,0.09537600477536519
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,12,2,128,1,float16,fp8,0,0.09959999720255534
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,12,4,128,1,float16,float16,0,0.10106133421262105
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,12,4,128,1,float16,fp8,0,0.11212266484896342
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,12,1,128,1,float16,float16,0,1.7995200157165527
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,12,1,128,1,float16,fp8,0,1.9120160738627117
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,12,2,128,1,float16,float16,0,1.9389972686767578
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,12,4,128,1,float16,float16,0,2.6220800081888833
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,12,12,128,1,float16,float16,0,1.411824067433675
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,12,1,128,1,float16,float16,0,0.9143839677174886
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,12,12,128,1,float16,fp8,0,1.4871946970621746
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,12,2,128,1,float16,fp8,0,2.0719626744588218
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,12,4,128,1,float16,fp8,0,2.7944908142089844
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,12,1,128,1,float16,fp8,0,0.9751573403676351
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,12,2,128,1,float16,float16,0,0.996293306350708
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,12,2,128,1,float16,fp8,0,1.0549226601918538
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,12,12,128,1,float16,float16,0,0.7190399964650472
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,12,4,128,1,float16,float16,0,1.3140106995900471
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,12,4,128,1,float16,fp8,0,1.4315733909606934
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,12,1,128,1,float16,float16,0,0.46956801414489746
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,12,1,128,1,float16,fp8,0,0.5053653319676717
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,12,2,128,1,float16,float16,0,0.5337226788202921
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,12,12,128,1,float16,fp8,0,0.7762719790140787
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,12,4,128,1,float16,float16,0,0.6663039922714233
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,12,2,128,1,float16,fp8,0,0.5471466779708862
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,12,4,128,1,float16,fp8,0,0.7337706883748373
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,12,12,128,1,float16,float16,0,0.37674665451049805
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,12,12,128,1,float16,fp8,0,0.41140798727671307
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,12,1,128,1,float16,float16,0,0.2517919937769572
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,12,1,128,1,float16,fp8,0,0.27086400985717773
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,12,2,128,1,float16,float16,0,0.28357332944869995
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,12,2,128,1,float16,fp8,0,0.29130132993062335
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,12,4,128,1,float16,float16,0,0.3541066646575928
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,12,4,128,1,float16,fp8,0,0.39168532689412433
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,12,12,128,1,float16,float16,0,0.2039573391278585
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,12,2,128,1,float16,fp8,0,0.16452800234158835
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,12,12,128,1,float16,fp8,0,0.22672533988952637
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,12,1,128,1,float16,float16,0,0.14168000221252441
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,12,1,128,1,float16,fp8,0,0.15129066507021585
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,12,2,128,1,float16,float16,0,0.1599573294321696
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,12,4,128,1,float16,float16,0,0.19399466117223105
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,12,4,128,1,float16,fp8,0,0.2130933403968811
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,12,12,128,1,float16,float16,0,0.11566932996114095
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,12,12,128,1,float16,fp8,0,0.12962133685747781
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,12,1,128,1,float16,float16,0,0.07876266539096832
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,12,1,128,1,float16,fp8,0,0.08637332916259766
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,12,4,128,1,float16,fp8,0,0.11223999659220378
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,12,2,128,1,float16,float16,0,0.08549867073694865
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,12,2,128,1,float16,fp8,0,0.09319999814033508
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,12,4,128,1,float16,float16,0,0.10419199864069621
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,12,12,128,1,float16,float16,0,0.06663466493288676
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,12,12,128,1,float16,fp8,0,0.07302933434645335
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,12,1,128,1,float16,float16,0,0.05156266689300537
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,12,1,128,1,float16,fp8,0,0.05680533250172933
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,12,2,128,1,float16,float16,0,0.05372266471385956
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,12,2,128,1,float16,fp8,0,0.060229331254959106
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,12,4,128,1,float16,float16,0,0.06157866617043813
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,12,4,128,1,float16,fp8,0,0.07131733496983846
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,12,1,128,1,float16,float16,0,1.118341366449992
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,12,1,128,1,float16,fp8,0,1.218511978785197
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,12,2,128,1,float16,float16,0,1.248645305633545
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,12,4,128,1,float16,float16,0,1.7260053952534993
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,12,12,128,1,float16,float16,0,0.9462933540344238
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,12,2,128,1,float16,fp8,0,1.3354986508687336
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,12,4,128,1,float16,fp8,0,1.89519468943278
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,12,12,128,1,float16,fp8,0,1.0270933310190837
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,12,1,128,1,float16,float16,0,0.571178674697876
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,12,1,128,1,float16,fp8,0,0.6231626669565836
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,12,2,128,1,float16,float16,0,0.6596533457438151
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,12,2,128,1,float16,fp8,0,0.6845440069834391
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,12,4,128,1,float16,float16,0,0.8640639781951904
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,12,4,128,1,float16,fp8,0,0.972879966100057
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,12,12,128,1,float16,float16,0,0.48585601647694904
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,12,12,128,1,float16,fp8,0,0.5419253508249918
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,12,1,128,1,float16,float16,0,0.3001493414243062
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,12,1,128,1,float16,fp8,0,0.3291306694348653
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,12,2,128,1,float16,fp8,0,0.35903998215993244
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,12,2,128,1,float16,float16,0,0.34696535269419354
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,12,4,128,1,float16,float16,0,0.45285332202911377
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,12,12,128,1,float16,float16,0,0.2577600081761678
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,12,4,128,1,float16,fp8,0,0.509333332379659
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,12,1,128,1,float16,float16,0,0.16590933005015054
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,12,12,128,1,float16,fp8,0,0.29048534234364826
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,12,1,128,1,float16,fp8,0,0.18051733573277792
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,12,2,128,1,float16,fp8,0,0.19814932346343994
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,12,4,128,1,float16,float16,0,0.24041066567103067
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,12,2,128,1,float16,float16,0,0.19061867396036783
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,12,4,128,1,float16,fp8,0,0.27830400069554645
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,12,12,128,1,float16,float16,0,0.1428053379058838
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,12,12,128,1,float16,fp8,0,0.16179200013478598
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,12,1,128,1,float16,float16,0,0.09284800291061401
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,12,1,128,1,float16,fp8,0,0.10074667135874431
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,12,2,128,1,float16,float16,0,0.10559466481208801
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,12,2,128,1,float16,fp8,0,0.10916266838709514
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,12,4,128,1,float16,float16,0,0.13115200400352478
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,12,4,128,1,float16,fp8,0,0.1509226659933726
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,12,12,128,1,float16,float16,0,0.08261333405971527
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,12,12,128,1,float16,fp8,0,0.09061866998672485
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,12,1,128,1,float16,float16,0,0.05412266651789347
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,12,1,128,1,float16,fp8,0,0.06266666452089946
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,12,2,128,1,float16,float16,0,0.05989866455396017
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,12,2,128,1,float16,fp8,0,0.06619733572006226
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,12,4,128,1,float16,float16,0,0.06857599814732869
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,12,4,128,1,float16,fp8,0,0.07995200157165527
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,12,12,128,1,float16,float16,0,0.04638933142026266
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,12,12,128,1,float16,fp8,0,0.05525333185990652
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,12,1,128,1,float16,float16,0,0.03878933439652125
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,12,1,128,1,float16,fp8,0,0.044394666949907936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,12,2,128,1,float16,float16,0,0.040181333820025124
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,12,2,128,1,float16,fp8,0,0.04671466847260793
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,12,4,128,1,float16,float16,0,0.04377600053946177
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,12,4,128,1,float16,fp8,0,0.05161066850026449
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,12,1,128,1,float16,float16,0,1.3683573404947917
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,12,1,128,1,float16,fp8,0,1.4380639394124348
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,12,2,128,1,float16,float16,0,1.5331199963887532
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,12,12,128,1,float16,float16,0,1.1825706958770752
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,12,2,128,1,float16,fp8,0,1.6035787264506023
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,12,12,128,1,float16,fp8,0,1.2813013394673665
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,12,1,128,1,float16,float16,0,0.7000746726989746
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,12,1,128,1,float16,fp8,0,0.7341439723968506
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,12,2,128,1,float16,float16,0,0.7916053136189779
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,12,4,128,1,float16,float16,0,2.1550505956014
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,12,4,128,1,float16,fp8,0,2.350581328074137
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,12,4,128,1,float16,float16,0,1.0950133005777996
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,12,2,128,1,float16,fp8,0,0.817525307337443
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,12,12,128,1,float16,float16,0,0.6046080191930135
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,12,4,128,1,float16,fp8,0,1.2021066347757976
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,12,12,128,1,float16,fp8,0,0.6758613586425781
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,12,1,128,1,float16,float16,0,0.3604319890340169
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,12,4,128,1,float16,float16,0,0.5502080122629801
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,12,12,128,1,float16,fp8,0,0.34777601559956867
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,12,1,128,1,float16,float16,0,0.193615992863973
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,12,2,128,1,float16,fp8,0,0.4235039949417114
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,12,1,128,1,float16,fp8,0,0.38096535205841064
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,12,12,128,1,float16,float16,0,0.31597866614659625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,12,4,128,1,float16,fp8,0,0.6137866576512655
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,12,2,128,1,float16,fp8,0,0.2268106738726298
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,12,1,128,1,float16,fp8,0,0.20320000251134238
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,12,2,128,1,float16,float16,0,0.22340265909830728
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,12,2,128,1,float16,float16,0,0.43273067474365234
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,12,4,128,1,float16,float16,0,0.29363733530044556
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,12,12,128,1,float16,float16,0,0.16830933094024658
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,12,4,128,1,float16,fp8,0,0.32928532361984253
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,12,12,128,1,float16,fp8,0,0.19285333156585693
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,12,1,128,1,float16,float16,0,0.10805333654085796
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,12,1,128,1,float16,fp8,0,0.11390399932861328
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,12,2,128,1,float16,float16,0,0.12572800119717917
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,12,2,128,1,float16,fp8,0,0.12666133046150208
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,12,4,128,1,float16,fp8,0,0.17748266458511353
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,12,4,128,1,float16,float16,0,0.15948266784350076
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,12,1,128,1,float16,fp8,0,0.06392000118891399
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,12,12,128,1,float16,float16,0,0.09479999542236328
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,12,12,128,1,float16,fp8,0,0.107424000898997
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,12,1,128,1,float16,float16,0,0.05890666445096334
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,12,2,128,1,float16,float16,0,0.0643146683772405
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,12,2,128,1,float16,fp8,0,0.06931200126806895
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,12,4,128,1,float16,float16,0,0.08161599934101105
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,12,4,128,1,float16,fp8,0,0.08996267120043437
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,12,12,128,1,float16,float16,0,0.05086933573087057
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,12,12,128,1,float16,fp8,0,0.05659733215967814
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,12,1,128,1,float16,float16,0,0.037733333806196846
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,12,1,128,1,float16,fp8,0,0.04070399949947993
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,12,2,128,1,float16,float16,0,0.04160533348719279
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,12,2,128,1,float16,fp8,0,0.045552000403404236
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,12,4,128,1,float16,float16,0,0.04668800036112467
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,12,1,128,1,float16,fp8,0,0.028922667105992634
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,12,4,128,1,float16,fp8,0,0.05383466680844625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,12,12,128,1,float16,float16,0,0.031173333525657654
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,12,2,128,1,float16,fp8,0,0.03068800022204717
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,12,12,128,1,float16,fp8,0,0.035930665830771126
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,12,1,128,1,float16,float16,0,0.026261332134405773
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,12,2,128,1,float16,float16,0,0.027717334528764088
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,12,4,128,1,float16,float16,0,0.03031466652949651
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,12,4,128,1,float16,fp8,0,0.03516799956560135
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,12,1,128,1,float16,float16,0,0.8994186719258627
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,12,1,128,1,float16,fp8,0,1.0251146952311199
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,12,2,128,1,float16,float16,0,1.1170720259348552
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,12,12,128,1,float16,float16,0,0.9520213603973389
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,12,2,128,1,float16,fp8,0,1.1982826391855876
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,12,1,128,1,float16,float16,0,0.4615306854248047
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,12,4,128,1,float16,float16,0,1.691098690032959
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,12,12,128,1,float16,fp8,0,1.1166293621063232
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,12,1,128,1,float16,fp8,0,0.5258026520411173
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,12,4,128,1,float16,fp8,0,1.9448960622151692
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,12,2,128,1,float16,float16,0,0.5683199961980184
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,12,2,128,1,float16,fp8,0,0.6220266819000244
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,12,4,128,1,float16,float16,0,0.8613226413726807
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,12,12,128,1,float16,fp8,0,0.5875093142191569
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,12,1,128,1,float16,float16,0,0.24064000447591147
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,12,4,128,1,float16,fp8,0,1.0023307005564372
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,12,12,128,1,float16,float16,0,0.4848426580429077
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,12,2,128,1,float16,float16,0,0.3091626763343811
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,12,1,128,1,float16,fp8,0,0.27741867303848267
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,12,2,128,1,float16,fp8,0,0.32223467032114667
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,12,4,128,1,float16,float16,0,0.4326666593551636
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,12,12,128,1,float16,float16,0,0.25166932741800946
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,12,4,128,1,float16,fp8,0,0.5147413412729899
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,12,1,128,1,float16,float16,0,0.1325813333193461
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,12,12,128,1,float16,fp8,0,0.30747199058532715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,12,2,128,1,float16,float16,0,0.16554133097330728
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,12,1,128,1,float16,fp8,0,0.15038399895032248
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,12,2,128,1,float16,fp8,0,0.17428799470265707
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,12,4,128,1,float16,float16,0,0.22989867130915323
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,12,4,128,1,float16,fp8,0,0.2714986602465312
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,12,12,128,1,float16,float16,0,0.13431466619173685
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,12,12,128,1,float16,fp8,0,0.1663093368212382
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,12,1,128,1,float16,float16,0,0.07603733241558075
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,12,1,128,1,float16,fp8,0,0.0851039985815684
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,12,2,128,1,float16,float16,0,0.09242133299509685
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,12,2,128,1,float16,fp8,0,0.10005866487820943
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,12,4,128,1,float16,float16,0,0.12306666374206543
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,12,4,128,1,float16,fp8,0,0.1497813363869985
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,12,12,128,1,float16,float16,0,0.07598400115966797
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,12,12,128,1,float16,fp8,0,0.0941493312517802
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,12,1,128,1,float16,float16,0,0.04159999887148539
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,12,1,128,1,float16,fp8,0,0.04996799925963084
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,12,2,128,1,float16,float16,0,0.047024001677831016
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,12,2,128,1,float16,fp8,0,0.05505066613356272
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,12,4,128,1,float16,float16,0,0.06559466818968455
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,12,4,128,1,float16,fp8,0,0.07250666618347168
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,12,12,128,1,float16,float16,0,0.04035199930270513
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,12,12,128,1,float16,fp8,0,0.04854933420817057
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,12,1,128,1,float16,float16,0,0.028351999819278717
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,12,1,128,1,float16,fp8,0,0.03421866645415624
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,12,2,128,1,float16,float16,0,0.0305226668715477
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,12,2,128,1,float16,fp8,0,0.03828266759713491
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,12,4,128,1,float16,float16,0,0.036159999668598175
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,12,4,128,1,float16,fp8,0,0.044853334625562034
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,12,12,128,1,float16,float16,0,0.02480533222357432
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,12,12,128,1,float16,fp8,0,0.03088533381621043
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,12,1,128,1,float16,float16,0,0.019968000551064808
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,12,1,128,1,float16,fp8,0,0.02386666586001714
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,12,2,128,1,float16,float16,0,0.021146667500336964
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,12,2,128,1,float16,fp8,0,0.026672000686327618
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,12,4,128,1,float16,float16,0,0.023984000086784363
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,12,4,128,1,float16,fp8,0,0.03052799900372823
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,12,12,128,1,float16,float16,0,0.02146666745344798
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,12,12,128,1,float16,fp8,0,0.026837334036827087
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,12,1,128,1,float16,float16,0,0.019248000035683315
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,12,1,128,1,float16,fp8,0,0.02256533255179723
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,12,2,128,1,float16,float16,0,0.020031999796628952
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,12,2,128,1,float16,fp8,0,0.024693332612514496
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,12,4,128,1,float16,float16,0,0.020960000654061634
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,12,4,128,1,float16,fp8,0,0.026661333938439686
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,12,1,128,1,float16,float16,0,0.3901866674423218
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,12,1,128,1,float16,fp8,0,0.454095999399821
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,12,2,128,1,float16,fp8,0,0.5395253499348959
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,12,2,128,1,float16,float16,0,0.4863893191019694
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,12,1,128,1,float16,float16,0,0.20507200558980307
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,12,12,128,1,float16,fp8,0,0.5261439879735311
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,12,12,128,1,float16,float16,0,0.449130654335022
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,12,4,128,1,float16,fp8,0,0.9029813607533773
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,12,4,128,1,float16,float16,0,0.7833279768625895
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,12,1,128,1,float16,fp8,0,0.23869333664576212
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,12,2,128,1,float16,fp8,0,0.28594134251276654
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,12,2,128,1,float16,float16,0,0.27458133300145465
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,12,4,128,1,float16,float16,0,0.3999733527501424
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,12,1,128,1,float16,fp8,0,0.1302079955736796
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,12,1,128,1,float16,float16,0,0.1130506694316864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,12,2,128,1,float16,float16,0,0.14293866356213888
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,12,12,128,1,float16,float16,0,0.23440533876419067
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,12,2,128,1,float16,fp8,0,0.15531733632087708
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,12,4,128,1,float16,float16,0,0.21075733502705893
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,12,12,128,1,float16,fp8,0,0.27086400985717773
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,12,4,128,1,float16,fp8,0,0.4723466634750366
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,12,4,128,1,float16,fp8,0,0.2556533416112264
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,12,12,128,1,float16,float16,0,0.1253973344961802
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,12,12,128,1,float16,fp8,0,0.15202132860819498
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,12,1,128,1,float16,fp8,0,0.07546666761239369
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,12,1,128,1,float16,float16,0,0.0645653357108434
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,12,4,128,1,float16,float16,0,0.11265599727630615
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,12,2,128,1,float16,float16,0,0.0822026679913203
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,12,2,128,1,float16,fp8,0,0.08953600128491719
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,12,4,128,1,float16,fp8,0,0.14034133156140646
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,12,12,128,1,float16,float16,0,0.06969066460927327
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,12,12,128,1,float16,fp8,0,0.0844053328037262
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,12,1,128,1,float16,float16,0,0.0345920001467069
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,12,1,128,1,float16,fp8,0,0.04193066557248434
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,12,2,128,1,float16,float16,0,0.03972800076007843
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,12,2,128,1,float16,fp8,0,0.04775466521581014
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,12,4,128,1,float16,float16,0,0.05821333328882853
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,12,4,128,1,float16,fp8,0,0.06713066498438518
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,12,2,128,1,float16,float16,0,0.02610666553179423
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,12,12,128,1,float16,float16,0,0.03728000074625015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,12,12,128,1,float16,fp8,0,0.043951998154322304
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,12,4,128,1,float16,fp8,0,0.03926933308442434
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,12,1,128,1,float16,float16,0,0.023685333629449207
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,12,1,128,1,float16,fp8,0,0.02834133307139079
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,12,2,128,1,float16,fp8,0,0.03262399882078171
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,12,4,128,1,float16,float16,0,0.03156800071398417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,12,12,128,1,float16,float16,0,0.02164799968401591
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,12,12,128,1,float16,fp8,0,0.027744000156720478
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,12,1,128,1,float16,float16,0,0.017423999806245167
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,12,1,128,1,float16,fp8,0,0.020351999749739964
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,12,2,128,1,float16,float16,0,0.018432000031073887
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,12,12,128,1,float16,fp8,0,0.02399466683467229
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,12,2,128,1,float16,fp8,0,0.02292799949645996
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,12,4,128,1,float16,float16,0,0.021194666624069214
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,12,4,128,1,float16,fp8,0,0.026389333109060924
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,12,12,128,1,float16,float16,0,0.018383999665578205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,12,1,128,1,float16,float16,0,0.016693333784739178
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,12,1,128,1,float16,fp8,0,0.019754666835069656
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,12,2,128,1,float16,float16,0,0.01681600014368693
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,12,12,128,1,float16,fp8,0,0.021759999295075733
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,12,2,128,1,float16,fp8,0,0.021557333568731945
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,12,4,128,1,float16,float16,0,0.018181333939234417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,12,4,128,1,float16,fp8,0,0.022757334013779957
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,12,4,128,1,float16,float16,0,0.016800000021855038
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,12,12,128,1,float16,float16,0,0.01670933390657107
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,12,1,128,1,float16,float16,0,0.016554666062196095
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,12,1,128,1,float16,fp8,0,0.019173332800467808
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,12,2,128,1,float16,float16,0,0.016341333587964375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,12,2,128,1,float16,fp8,0,0.02065066620707512
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,12,4,128,1,float16,fp8,0,0.02067733307679494
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,12,1,128,1,float16,float16,0,0.19521600008010864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,12,1,128,1,float16,fp8,0,0.21818133195241293
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,12,2,128,1,float16,float16,0,0.25601067145665485
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,12,2,128,1,float16,fp8,0,0.2604106664657593
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,12,4,128,1,float16,float16,0,0.3861173391342163
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,12,1,128,1,float16,float16,0,0.10869333148002625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,12,1,128,1,float16,fp8,0,0.12126400073369344
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,12,4,128,1,float16,fp8,0,0.4373013178507487
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,12,2,128,1,float16,float16,0,0.1402453382809957
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,12,2,128,1,float16,fp8,0,0.14018133282661438
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,12,12,128,1,float16,fp8,0,0.25632532437642414
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,12,12,128,1,float16,float16,0,0.22619199752807617
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,12,4,128,1,float16,float16,0,0.2055306633313497
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,12,4,128,1,float16,fp8,0,0.23837333917617798
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,12,12,128,1,float16,float16,0,0.12108799815177917
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,12,12,128,1,float16,fp8,0,0.13658666610717773
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,12,1,128,1,float16,float16,0,0.06201600035031637
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,12,1,128,1,float16,fp8,0,0.06757333377997081
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,12,2,128,1,float16,float16,0,0.07939200103282928
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,12,4,128,1,float16,float16,0,0.11063999931017558
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,12,4,128,1,float16,fp8,0,0.12710400422414145
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,12,12,128,1,float16,float16,0,0.06730133295059204
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,12,2,128,1,float16,fp8,0,0.07965866724650066
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,12,1,128,1,float16,float16,0,0.030810666580994923
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,12,12,128,1,float16,fp8,0,0.07598400115966797
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,12,1,128,1,float16,fp8,0,0.03605333218971888
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,12,2,128,1,float16,float16,0,0.036176001032193504
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,12,2,128,1,float16,fp8,0,0.03995733211437861
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,12,4,128,1,float16,float16,0,0.05395199855168661
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,12,4,128,1,float16,fp8,0,0.059338668982187905
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,12,12,128,1,float16,float16,0,0.03491200009981791
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,12,12,128,1,float16,fp8,0,0.036687999963760376
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,12,1,128,1,float16,float16,0,0.0222080002228419
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,12,1,128,1,float16,fp8,0,0.02590399980545044
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,12,2,128,1,float16,fp8,0,0.028938665986061096
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,12,2,128,1,float16,float16,0,0.024906667570273083
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,12,4,128,1,float16,float16,0,0.029978667696317036
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,12,4,128,1,float16,fp8,0,0.035375999907652535
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,12,12,128,1,float16,fp8,0,0.023610666394233704
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,12,12,128,1,float16,float16,0,0.02015999952952067
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,12,1,128,1,float16,float16,0,0.015882667154073715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,12,1,128,1,float16,fp8,0,0.018581333259741466
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,12,2,128,1,float16,float16,0,0.01700266698996226
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,12,2,128,1,float16,fp8,0,0.01979200045267741
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,12,4,128,1,float16,float16,0,0.019466667125622433
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,12,4,128,1,float16,fp8,0,0.023002666731675465
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,12,12,128,1,float16,float16,0,0.016858667135238647
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,12,12,128,1,float16,fp8,0,0.01971199984351794
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,12,1,128,1,float16,float16,0,0.014949332922697067
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,12,1,128,1,float16,fp8,0,0.01740266631046931
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,12,2,128,1,float16,float16,0,0.01571200042963028
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,12,2,128,1,float16,fp8,0,0.01777600000301997
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,12,4,128,1,float16,float16,0,0.016607999801635742
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,12,4,128,1,float16,fp8,0,0.01933866615096728
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,12,12,128,1,float16,float16,0,0.015397333850463232
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,12,12,128,1,float16,fp8,0,0.017632000148296356
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,12,1,128,1,float16,float16,0,0.014666666587193808
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,12,1,128,1,float16,fp8,0,0.01749333366751671
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,12,2,128,1,float16,float16,0,0.014778666198253632
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,12,2,128,1,float16,fp8,0,0.01735466718673706
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,12,12,128,1,float16,fp8,0,0.016229332735141117
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,12,4,128,1,float16,float16,0,0.015173333386580149
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,12,4,128,1,float16,fp8,0,0.017727999637524288
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,12,12,128,1,float16,float16,0,0.013797332843144735
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,12,1,128,1,float16,float16,0,0.014069333672523499
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,12,1,128,1,float16,fp8,0,0.0161013330022494
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,12,2,128,1,float16,float16,0,0.013978666315476099
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,12,2,128,1,float16,fp8,0,0.01703466723362605
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,12,4,128,1,float16,float16,0,0.014282666146755219
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,12,4,128,1,float16,fp8,0,0.016501333564519882
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,12,1,128,1,float16,float16,0,0.10739733775456746
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,12,1,128,1,float16,fp8,0,0.12099732955296834
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,12,2,128,1,float16,float16,0,0.13571199774742126
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,12,2,128,1,float16,fp8,0,0.13987200458844504
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,12,12,128,1,float16,fp8,0,0.13475199540456137
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,12,12,128,1,float16,float16,0,0.12990933656692505
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,12,4,128,1,float16,float16,0,0.2222399910291036
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,12,1,128,1,float16,float16,0,0.06149866680304209
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,12,4,128,1,float16,fp8,0,0.25633599360783893
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,12,1,128,1,float16,fp8,0,0.06795200208822887
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,12,2,128,1,float16,float16,0,0.07909866670767467
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,12,2,128,1,float16,fp8,0,0.07928533355395
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,12,4,128,1,float16,float16,0,0.11745066444079082
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,12,12,128,1,float16,float16,0,0.07231999933719635
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,12,12,128,1,float16,fp8,0,0.07343466579914093
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,12,4,128,1,float16,fp8,0,0.1392159958680471
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,12,1,128,1,float16,float16,0,0.030495998760064442
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,12,1,128,1,float16,fp8,0,0.03610666592915853
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,12,2,128,1,float16,float16,0,0.035775999228159584
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,12,2,128,1,float16,fp8,0,0.04010133445262909
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,12,4,128,1,float16,float16,0,0.059530665477116905
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,12,4,128,1,float16,fp8,0,0.0643039991458257
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,12,12,128,1,float16,float16,0,0.034074666599432625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,12,12,128,1,float16,fp8,0,0.03383466601371765
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,12,1,128,1,float16,float16,0,0.021877333521842957
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,12,1,128,1,float16,fp8,0,0.025962665677070618
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,12,2,128,1,float16,float16,0,0.02458133300145467
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,12,2,128,1,float16,fp8,0,0.02844800055027008
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,12,4,128,1,float16,float16,0,0.029743999242782593
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,12,4,128,1,float16,fp8,0,0.03566399961709976
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,12,2,128,1,float16,float16,0,0.01691199963291486
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,12,12,128,1,float16,float16,0,0.021536000072956085
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,12,12,128,1,float16,fp8,0,0.022848000129063923
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,12,1,128,1,float16,float16,0,0.015647999942302704
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,12,12,128,1,float16,fp8,0,0.016058667252461117
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,12,1,128,1,float16,fp8,0,0.018458666900793713
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,12,2,128,1,float16,fp8,0,0.019754666835069656
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,12,4,128,1,float16,float16,0,0.019493332753578823
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,12,12,128,1,float16,float16,0,0.015530666957298914
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,12,4,128,1,float16,fp8,0,0.023045333723227184
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,12,1,128,1,float16,float16,0,0.01488000030318896
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,12,1,128,1,float16,fp8,0,0.017562666287024815
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,12,2,128,1,float16,float16,0,0.015274666249752045
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,12,2,128,1,float16,fp8,0,0.018042666216691334
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,12,4,128,1,float16,float16,0,0.01648533344268799
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,12,4,128,1,float16,fp8,0,0.01932799940307935
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,12,12,128,1,float16,float16,0,0.013450667262077332
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,12,12,128,1,float16,fp8,0,0.013882666826248169
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,12,1,128,1,float16,float16,0,0.014901333798964819
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,12,1,128,1,float16,fp8,0,0.017162666966517765
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,12,2,128,1,float16,float16,0,0.014933332800865173
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,12,2,128,1,float16,fp8,0,0.017349333812793095
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,12,4,128,1,float16,float16,0,0.015333333363135656
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,12,4,128,1,float16,fp8,0,0.017349333812793095
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,12,12,128,1,float16,float16,0,0.013013333082199097
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,12,12,128,1,float16,fp8,0,0.01357866699496905
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,12,1,128,1,float16,float16,0,0.013978666315476099
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,12,1,128,1,float16,fp8,0,0.01598400001724561
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,12,2,128,1,float16,float16,0,0.01394133393963178
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,12,2,128,1,float16,fp8,0,0.016410666207472484
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,12,4,128,1,float16,float16,0,0.014368000129858652
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,12,4,128,1,float16,fp8,0,0.01651200031240781
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,12,12,128,1,float16,float16,0,0.011989332735538483
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,12,12,128,1,float16,fp8,0,0.012533333152532578
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,12,1,128,1,float16,float16,0,0.013637332866589228
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,12,1,128,1,float16,fp8,0,0.016016000260909397
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,12,2,128,1,float16,float16,0,0.013850666582584381
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,12,2,128,1,float16,fp8,0,0.01579733317097028
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,12,4,128,1,float16,float16,0,0.014021333307027817
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,12,4,128,1,float16,fp8,0,0.0161920003592968
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,12,1,128,1,float16,float16,0,0.061573331554730736
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,12,1,128,1,float16,fp8,0,0.06690666576226552
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,12,2,128,1,float16,float16,0,0.08730666836102803
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,12,2,128,1,float16,fp8,0,0.09115200241406758
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,12,12,128,1,float16,float16,0,0.09083732962608337
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,12,4,128,1,float16,float16,0,0.11867200334866841
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,12,12,128,1,float16,fp8,0,0.09194133679072063
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,12,1,128,1,float16,float16,0,0.030640001098314922
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,12,4,128,1,float16,fp8,0,0.13914666573206583
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,12,1,128,1,float16,fp8,0,0.035717333356539406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,12,2,128,1,float16,float16,0,0.04167466859022776
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,12,2,128,1,float16,fp8,0,0.04589866598447164
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,12,4,128,1,float16,float16,0,0.057071998715400696
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,12,12,128,1,float16,float16,0,0.045184001326560974
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,12,4,128,1,float16,fp8,0,0.062362665931383766
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,12,12,128,1,float16,fp8,0,0.04282666742801666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,12,1,128,1,float16,float16,0,0.02223466585079829
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,12,1,128,1,float16,fp8,0,0.025941332181294758
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,12,2,128,1,float16,float16,0,0.024725332856178284
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,12,2,128,1,float16,fp8,0,0.028794666131337483
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,12,4,128,1,float16,float16,0,0.029706666866938274
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,12,4,128,1,float16,fp8,0,0.035642666121323906
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,12,12,128,1,float16,float16,0,0.025125332176685333
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,12,12,128,1,float16,fp8,0,0.026474667092164356
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,12,1,128,1,float16,float16,0,0.01571200042963028
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,12,1,128,1,float16,fp8,0,0.01791999985774358
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,12,2,128,1,float16,float16,0,0.016762666404247284
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,12,2,128,1,float16,fp8,0,0.0195573332409064
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,12,4,128,1,float16,float16,0,0.01937066639463107
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,12,4,128,1,float16,fp8,0,0.02292799949645996
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,12,12,128,1,float16,float16,0,0.01855466639002164
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,12,12,128,1,float16,fp8,0,0.019365333020687103
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,12,1,128,1,float16,float16,0,0.015210667004187902
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,12,1,128,1,float16,fp8,0,0.017952000101407368
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,12,2,128,1,float16,float16,0,0.01505600040157636
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,12,12,128,1,float16,fp8,0,0.01422400027513504
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,12,2,128,1,float16,fp8,0,0.01791999985774358
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,12,4,128,1,float16,float16,0,0.01623999948302905
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,12,4,128,1,float16,fp8,0,0.01950399950146675
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,12,12,128,1,float16,float16,0,0.013274667163689932
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,12,1,128,1,float16,float16,0,0.014949332922697067
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,12,1,128,1,float16,fp8,0,0.017231999586025875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,12,12,128,1,float16,float16,0,0.013264000415802002
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,12,2,128,1,float16,float16,0,0.014592000593741735
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,12,2,128,1,float16,fp8,0,0.017642666896184284
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,12,4,128,1,float16,float16,0,0.014581333845853806
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,12,4,128,1,float16,fp8,0,0.01735466718673706
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,12,12,128,1,float16,fp8,0,0.01351999988158544
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,12,1,128,1,float16,float16,0,0.013962666193644205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,12,1,128,1,float16,fp8,0,0.01659199967980385
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,12,2,128,1,float16,float16,0,0.014111999422311783
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,12,2,128,1,float16,fp8,0,0.01629866659641266
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,12,4,128,1,float16,float16,0,0.013999999811251959
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,12,4,128,1,float16,fp8,0,0.016106666376193363
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,12,12,128,1,float16,float16,0,0.012762666990359625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,12,12,128,1,float16,fp8,0,0.01331199953953425
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,12,1,128,1,float16,float16,0,0.013818666338920593
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,12,1,128,1,float16,fp8,0,0.01589866727590561
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,12,2,128,1,float16,float16,0,0.013978666315476099
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,12,2,128,1,float16,fp8,0,0.01602666700879733
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,12,4,128,1,float16,float16,0,0.01349866638580958
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,12,4,128,1,float16,fp8,0,0.015717333803574245
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,12,2,128,1,float16,float16,0,0.013653332988421122
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,12,12,128,1,float16,float16,0,0.011887999872366587
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,12,12,128,1,float16,fp8,0,0.012527999778588613
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,12,1,128,1,float16,float16,0,0.013760000467300415
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,12,1,128,1,float16,fp8,0,0.015728000551462173
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,12,2,128,1,float16,fp8,0,0.015743999431530636
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,12,4,128,1,float16,float16,0,0.013290667285521826
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,12,4,128,1,float16,fp8,0,0.015520000209410986
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,12,1,128,1,float16,float16,0,0.035573333501815796
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,12,1,128,1,float16,fp8,0,0.04078399886687597
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,12,2,128,1,float16,float16,0,0.040591999888420105
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,12,2,128,1,float16,fp8,0,0.04632000128428141
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,12,4,128,1,float16,float16,0,0.06495466828346252
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,12,12,128,1,float16,float16,0,0.064560001095136
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,12,4,128,1,float16,fp8,0,0.061610668897628784
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,12,12,128,1,float16,fp8,0,0.06085866689682007
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,12,1,128,1,float16,float16,0,0.022650666534900665
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,12,1,128,1,float16,fp8,0,0.025477332373460133
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,12,2,128,1,float16,float16,0,0.02442666639884313
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,12,2,128,1,float16,fp8,0,0.02899733434120814
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,12,4,128,1,float16,float16,0,0.03306666761636734
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,12,12,128,1,float16,float16,0,0.03469866762558619
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,12,4,128,1,float16,fp8,0,0.03561066587766012
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,12,12,128,1,float16,fp8,0,0.03581333408753077
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,12,1,128,1,float16,float16,0,0.01589866727590561
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,12,1,128,1,float16,fp8,0,0.018101333330074947
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,12,2,128,1,float16,float16,0,0.016773333152135212
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,12,2,128,1,float16,fp8,0,0.019845332950353622
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,12,4,128,1,float16,float16,0,0.021066665649414062
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,12,12,128,1,float16,float16,0,0.02184533327817917
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,12,4,128,1,float16,fp8,0,0.022677332162857056
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,12,12,128,1,float16,fp8,0,0.02293866624434789
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,12,1,128,1,float16,float16,0,0.014981333166360855
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,12,1,128,1,float16,fp8,0,0.01762666677435239
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,12,2,128,1,float16,float16,0,0.015087999403476715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,12,2,128,1,float16,fp8,0,0.017349333812793095
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,12,4,128,1,float16,float16,0,0.014741333822409311
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,12,4,128,1,float16,fp8,0,0.015829333414634068
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,12,12,128,1,float16,float16,0,0.016741332908471424
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,12,12,128,1,float16,fp8,0,0.01728533332546552
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,12,1,128,1,float16,float16,0,0.014746667196353277
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,12,1,128,1,float16,fp8,0,0.017674667139848072
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,12,2,128,1,float16,float16,0,0.0144213338692983
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,12,12,128,1,float16,float16,0,0.01292266696691513
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,12,2,128,1,float16,fp8,0,0.01740266631046931
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,12,4,128,1,float16,float16,0,0.013568000247081121
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,12,4,128,1,float16,fp8,0,0.013728000223636627
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,12,12,128,1,float16,fp8,0,0.013658666362365087
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,12,1,128,1,float16,float16,0,0.014175999909639359
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,12,1,128,1,float16,fp8,0,0.016389333953460056
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,12,2,128,1,float16,float16,0,0.0136266661187013
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,12,2,128,1,float16,fp8,0,0.015957333147525787
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,12,4,128,1,float16,float16,0,0.01322666679819425
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,12,4,128,1,float16,fp8,0,0.013621332744757334
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,12,12,128,1,float16,float16,0,0.012874666601419449
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,12,12,128,1,float16,fp8,0,0.013738666971524557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,12,1,128,1,float16,float16,0,0.013717333475748697
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,12,1,128,1,float16,fp8,0,0.015573333948850632
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,12,12,128,1,float16,fp8,0,0.013343999783198038
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,12,2,128,1,float16,float16,0,0.013445333888133367
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,12,2,128,1,float16,fp8,0,0.01578666642308235
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,12,4,128,1,float16,float16,0,0.012117333710193634
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,12,12,128,1,float16,float16,0,0.01267733300725619
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,12,4,128,1,float16,fp8,0,0.01246400053302447
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,12,1,128,1,float16,float16,0,0.013525333255529404
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,12,1,128,1,float16,fp8,0,0.015583999454975128
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,12,2,128,1,float16,float16,0,0.013327999661366144
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,12,12,128,1,float16,fp8,0,0.012373333175977072
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,12,1,128,1,float16,float16,0,0.013514666507641474
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,12,2,128,1,float16,fp8,0,0.015317333241303762
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,12,4,128,1,float16,float16,0,0.01191466674208641
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,12,4,128,1,float16,fp8,0,0.01228800043463707
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,12,12,128,1,float16,float16,0,0.012122667084137598
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,12,1,128,1,float16,fp8,0,0.015205333630243937
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,12,2,128,1,float16,float16,0,0.013338666409254074
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,12,2,128,1,float16,fp8,0,0.01543466622630755
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,12,4,128,1,float16,float16,0,0.011930666863918304
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,12,4,128,1,float16,fp8,0,0.011941333611806234
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,8,8,128,1,float16,float16,0,4.106399854024251
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,8,8,128,1,float16,fp8,0,4.099205334981282
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,8,1,128,1,float16,float16,0,7.853365580240886
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,8,1,128,1,float16,fp8,0,7.211178461710612
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,8,2,128,1,float16,float16,0,7.6857865651448565
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,8,2,128,1,float16,fp8,0,7.294933319091797
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,8,4,128,1,float16,float16,0,8.445130666097006
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,8,4,128,1,float16,fp8,0,8.007216135660807
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,8,1,128,1,float16,float16,0,3.566725413004557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,8,1,128,1,float16,fp8,0,3.63426144917806
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,8,2,128,1,float16,float16,0,3.8626505533854165
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,8,2,128,1,float16,fp8,0,3.6856746673583984
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,8,4,128,1,float16,fp8,0,4.086319923400879
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,8,4,128,1,float16,float16,0,4.407285372416179
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,8,8,128,1,float16,float16,0,2.122378667195638
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,8,8,128,1,float16,fp8,0,2.123173395792643
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,8,1,128,1,float16,float16,0,1.8368639945983887
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,8,8,128,1,float16,float16,0,4.408624013264974
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,8,8,128,1,float16,fp8,0,4.054538726806641
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,8,1,128,1,float16,fp8,0,1.8828852971394856
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,8,2,128,1,float16,float16,0,1.9216747283935547
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,8,2,128,1,float16,fp8,0,2.0137866338094077
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,8,4,128,1,float16,float16,0,2.0657386779785156
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,8,4,128,1,float16,fp8,0,2.0868639945983887
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,8,8,128,1,float16,float16,0,2.1203840573628745
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,8,8,128,1,float16,float16,0,1.1055200099945068
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,8,8,128,1,float16,fp8,0,1.1390666961669922
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,8,8,128,1,float16,fp8,0,2.2460907300313315
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,8,1,128,1,float16,float16,0,0.9558400313059489
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,8,1,128,1,float16,fp8,0,1.03492267926534
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,8,2,128,1,float16,float16,0,1.0236586729685466
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,8,2,128,1,float16,fp8,0,1.020906686782837
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,8,8,128,1,float16,float16,0,1.1002506415049236
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,8,4,128,1,float16,float16,0,1.0919360319773357
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,8,4,128,1,float16,fp8,0,1.1194240252176921
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,8,8,128,1,float16,fp8,0,1.1537813345591228
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,8,1,128,1,float16,float16,0,4.054106712341309
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,8,1,128,1,float16,fp8,0,4.159461339314778
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,8,8,128,1,float16,float16,0,2.429194609324137
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,8,8,128,1,float16,fp8,0,2.4953440030415854
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,8,2,128,1,float16,float16,0,4.439546585083008
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,8,2,128,1,float16,fp8,0,4.228261311848958
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,8,4,128,1,float16,float16,0,4.749877293904622
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,8,4,128,1,float16,fp8,0,4.8137868245442705
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,8,1,128,1,float16,float16,0,2.075962702433268
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,8,1,128,1,float16,fp8,0,2.13209597269694
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,8,2,128,1,float16,float16,0,2.188485304514567
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,8,2,128,1,float16,fp8,0,2.1842452685038247
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,8,4,128,1,float16,float16,0,2.425450642903646
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,8,4,128,1,float16,fp8,0,2.4669013023376465
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,8,8,128,1,float16,float16,0,1.2794026533762615
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,8,8,128,1,float16,float16,0,2.463850657145182
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,8,1,128,1,float16,float16,0,1.0385653177897136
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,8,8,128,1,float16,fp8,0,1.3192319869995117
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,8,8,128,1,float16,fp8,0,2.532591978708903
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,8,1,128,1,float16,fp8,0,1.1973386605580647
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,8,2,128,1,float16,float16,0,1.1349706649780273
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,8,2,128,1,float16,fp8,0,1.1455573240915935
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,8,4,128,1,float16,float16,0,1.2519946893056233
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,8,4,128,1,float16,fp8,0,1.3048426310221355
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,8,8,128,1,float16,float16,0,1.2716106573740642
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,8,8,128,1,float16,float16,0,0.6895413398742676
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,8,8,128,1,float16,fp8,0,1.3182933330535889
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,8,8,128,1,float16,fp8,0,0.7143733501434326
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,8,1,128,1,float16,float16,0,0.5662346680959066
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,8,1,128,1,float16,fp8,0,0.6089226802190145
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,8,2,128,1,float16,float16,0,0.6039520104726156
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,8,4,128,1,float16,fp8,0,0.7074293295542399
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,8,2,128,1,float16,fp8,0,0.6234133243560791
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,8,4,128,1,float16,float16,0,0.6716799736022949
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,8,8,128,1,float16,float16,0,0.6775466601053873
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,8,8,128,1,float16,fp8,0,0.7184800306955973
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,8,1,128,1,float16,float16,0,2.865205446879069
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,8,1,128,1,float16,fp8,0,2.9521280924479165
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,8,2,128,1,float16,float16,0,3.071440060933431
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,8,2,128,1,float16,fp8,0,3.0306453704833984
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,8,8,128,1,float16,float16,0,1.778997262318929
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,8,4,128,1,float16,float16,0,3.4253759384155273
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,8,4,128,1,float16,fp8,0,3.492191950480143
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,8,8,128,1,float16,fp8,0,2.0707680384318032
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,8,1,128,1,float16,float16,0,1.422229290008545
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,8,1,128,1,float16,fp8,0,1.6964960098266602
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,8,2,128,1,float16,float16,0,1.561946709950765
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,8,4,128,1,float16,float16,0,1.7638079325358074
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,8,4,128,1,float16,fp8,0,1.8962027231852214
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,8,8,128,1,float16,float16,0,1.7758506139119465
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,8,8,128,1,float16,float16,0,0.9197386900583903
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,8,2,128,1,float16,fp8,0,1.5536266962687175
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,8,8,128,1,float16,fp8,0,0.9796160062154134
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,8,8,128,1,float16,fp8,0,1.8678080240885417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,8,1,128,1,float16,float16,0,0.7703946431477865
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,8,1,128,1,float16,fp8,0,0.8274186452229818
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,8,2,128,1,float16,float16,0,0.8269386291503906
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,8,2,128,1,float16,fp8,0,0.8210506439208984
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,8,4,128,1,float16,float16,0,0.9038240114847819
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,8,8,128,1,float16,fp8,0,0.5385973453521729
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,8,4,128,1,float16,fp8,0,0.9513440132141113
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,8,8,128,1,float16,float16,0,0.9227733612060547
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,8,8,128,1,float16,float16,0,0.509066661198934
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,8,2,128,1,float16,float16,0,0.4453866481781006
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,8,8,128,1,float16,fp8,0,0.9787519772847494
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,8,1,128,1,float16,float16,0,0.40985600153605145
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,8,1,128,1,float16,fp8,0,0.4357653458913167
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,8,2,128,1,float16,fp8,0,0.4633760054906209
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,8,4,128,1,float16,float16,0,0.49925867716471356
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,8,4,128,1,float16,fp8,0,0.5301706790924072
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,8,8,128,1,float16,float16,0,0.5056266784667969
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,8,8,128,1,float16,fp8,0,0.5391680002212524
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,8,1,128,1,float16,float16,0,3.7833334604899087
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,8,1,128,1,float16,fp8,0,3.8306185404459634
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,8,2,128,1,float16,float16,0,4.139573415120442
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,8,8,128,1,float16,float16,0,2.327557404836019
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,8,2,128,1,float16,fp8,0,3.968581199645996
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,8,8,128,1,float16,fp8,0,2.4455893834431968
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,8,4,128,1,float16,float16,0,4.636266708374023
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,8,1,128,1,float16,float16,0,1.9544213612874348
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,8,4,128,1,float16,fp8,0,4.701589266459147
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,8,1,128,1,float16,fp8,0,1.9577226638793945
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,8,2,128,1,float16,float16,0,2.1116533279418945
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,8,2,128,1,float16,fp8,0,2.03767999013265
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,8,4,128,1,float16,float16,0,2.339189370473226
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,8,8,128,1,float16,float16,0,1.2111252943674724
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,8,4,128,1,float16,fp8,0,2.409663995107015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,8,1,128,1,float16,float16,0,0.9816693464914957
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,8,8,128,1,float16,fp8,0,1.2606666882832844
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,8,8,128,1,float16,float16,0,2.388330618540446
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,8,8,128,1,float16,fp8,0,2.6342453956604004
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,8,1,128,1,float16,fp8,0,1.0237653255462646
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,8,2,128,1,float16,float16,0,1.0494773387908936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,8,2,128,1,float16,fp8,0,1.0506666501363118
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,8,4,128,1,float16,float16,0,1.178869326909383
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,8,4,128,1,float16,fp8,0,1.271504004796346
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,8,8,128,1,float16,float16,0,1.2095200220743816
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,8,8,128,1,float16,float16,0,0.6436640024185181
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,8,8,128,1,float16,fp8,0,1.273429314295451
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,8,8,128,1,float16,fp8,0,0.6855146884918213
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,8,1,128,1,float16,float16,0,0.5120853185653687
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,8,1,128,1,float16,fp8,0,0.5469599962234497
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,8,2,128,1,float16,float16,0,0.5473546584447225
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,8,2,128,1,float16,fp8,0,0.5668106476465861
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,8,4,128,1,float16,float16,0,0.6310400168100992
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,8,4,128,1,float16,fp8,0,0.6794880231221517
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,8,8,128,1,float16,float16,0,0.649834672609965
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,8,8,128,1,float16,fp8,0,0.6886879603068033
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,8,8,128,1,float16,float16,0,0.3587520122528076
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,8,8,128,1,float16,fp8,0,0.38657065232594806
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,8,1,128,1,float16,float16,0,0.27507734298706055
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,8,1,128,1,float16,fp8,0,0.3030346632003784
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,8,2,128,1,float16,float16,0,0.31089067459106445
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,8,2,128,1,float16,fp8,0,0.3174239993095398
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,8,4,128,1,float16,float16,0,0.3545600175857544
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,8,4,128,1,float16,fp8,0,0.3744746843973796
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,8,8,128,1,float16,float16,0,0.3577706813812256
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,8,8,128,1,float16,fp8,0,0.3873759905497233
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,8,1,128,1,float16,float16,0,2.159781297047933
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,8,1,128,1,float16,fp8,0,2.245903968811035
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,8,2,128,1,float16,float16,0,2.349173386891683
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,8,2,128,1,float16,fp8,0,2.377072016398112
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,8,8,128,1,float16,float16,0,1.4393119812011719
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,8,4,128,1,float16,float16,0,2.8337599436442056
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,8,1,128,1,float16,float16,0,1.103648026784261
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,8,4,128,1,float16,fp8,0,2.917695999145508
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,8,8,128,1,float16,fp8,0,1.5849067370096843
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,8,1,128,1,float16,fp8,0,1.1704479853312175
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,8,2,128,1,float16,float16,0,1.229354699452718
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,8,2,128,1,float16,fp8,0,1.2163626352945964
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,8,4,128,1,float16,float16,0,1.4483946164449055
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,8,4,128,1,float16,fp8,0,1.4948320388793945
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,8,8,128,1,float16,float16,0,1.440874735514323
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,8,8,128,1,float16,float16,0,0.7610987027486166
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,8,8,128,1,float16,fp8,0,1.5339520772298176
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,8,8,128,1,float16,fp8,0,0.8094560305277506
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,8,1,128,1,float16,float16,0,0.5628906488418579
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,8,1,128,1,float16,fp8,0,0.6265013217926025
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,8,4,128,1,float16,float16,0,0.7441226641337076
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,8,2,128,1,float16,float16,0,0.6352479855219523
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,8,2,128,1,float16,fp8,0,0.6459360122680664
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,8,8,128,1,float16,float16,0,0.7636853059132894
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,8,4,128,1,float16,fp8,0,0.7941546440124512
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,8,8,128,1,float16,float16,0,0.4097493489583333
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,8,8,128,1,float16,fp8,0,0.7974826494852701
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,8,1,128,1,float16,float16,0,0.3078293402989705
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,8,8,128,1,float16,fp8,0,0.44337066014607746
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,8,1,128,1,float16,fp8,0,0.3314293424288432
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,8,2,128,1,float16,float16,0,0.34849599997202557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,8,2,128,1,float16,fp8,0,0.3601280053456624
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,8,4,128,1,float16,float16,0,0.40092798074086505
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,8,4,128,1,float16,fp8,0,0.43481600284576416
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,8,8,128,1,float16,float16,0,0.40490134557088214
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,8,1,128,1,float16,fp8,0,0.19096000989278158
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,8,8,128,1,float16,fp8,0,0.4442239999771118
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,8,8,128,1,float16,float16,0,0.23637866973876953
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,8,8,128,1,float16,fp8,0,0.25481067101160687
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,8,1,128,1,float16,float16,0,0.17459199825922647
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,8,2,128,1,float16,float16,0,0.19771732886632284
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,8,2,128,1,float16,fp8,0,0.20483734210332236
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,8,4,128,1,float16,float16,0,0.22799466053644815
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,8,4,128,1,float16,fp8,0,0.239738663037618
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,8,8,128,1,float16,float16,0,0.23715200026830038
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,8,8,128,1,float16,fp8,0,0.2573866645495097
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,8,1,128,1,float16,float16,0,2.0435519218444824
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,8,1,128,1,float16,fp8,0,2.1487733523050943
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,8,2,128,1,float16,float16,0,2.313231945037842
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,8,2,128,1,float16,fp8,0,2.315648078918457
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,8,8,128,1,float16,float16,0,1.500442663828532
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,8,8,128,1,float16,fp8,0,1.5693599383036296
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,8,1,128,1,float16,float16,0,1.0421706835428874
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,8,4,128,1,float16,float16,0,2.8715893427530923
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,8,4,128,1,float16,fp8,0,3.015904108683268
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,8,1,128,1,float16,fp8,0,1.1046773592631023
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,8,2,128,1,float16,float16,0,1.1733012994130452
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,8,2,128,1,float16,fp8,0,1.1837013562520344
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,8,4,128,1,float16,float16,0,1.4679840405782063
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,8,4,128,1,float16,fp8,0,1.5468427340189617
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,8,8,128,1,float16,float16,0,0.7745440006256104
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,8,8,128,1,float16,float16,0,1.488053321838379
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,8,1,128,1,float16,float16,0,0.5350133180618286
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,8,8,128,1,float16,fp8,0,0.8389226595560709
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,8,8,128,1,float16,fp8,0,1.583797295888265
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,8,1,128,1,float16,fp8,0,0.5744479894638062
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,8,2,128,1,float16,float16,0,0.6045120159784952
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,8,2,128,1,float16,fp8,0,0.6205919981002808
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,8,4,128,1,float16,float16,0,0.7607253392537435
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,8,4,128,1,float16,fp8,0,0.8132320245107015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,8,8,128,1,float16,float16,0,0.7751253445943197
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,8,8,128,1,float16,float16,0,0.40426135063171387
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,8,8,128,1,float16,fp8,0,0.834933360417684
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,8,8,128,1,float16,fp8,0,0.44495999813079834
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,8,1,128,1,float16,float16,0,0.2860746582349141
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,8,1,128,1,float16,fp8,0,0.30872533718744916
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,8,2,128,1,float16,float16,0,0.3314719994862874
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,8,2,128,1,float16,fp8,0,0.33823466300964355
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,8,4,128,1,float16,float16,0,0.40120001633961994
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,8,4,128,1,float16,fp8,0,0.4337013165156047
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,8,8,128,1,float16,float16,0,0.4045706590016683
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,8,8,128,1,float16,fp8,0,0.4394773244857788
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,8,8,128,1,float16,float16,0,0.23059733708699545
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,8,8,128,1,float16,fp8,0,0.2505013346672058
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,8,1,128,1,float16,float16,0,0.16023466984430948
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,8,1,128,1,float16,fp8,0,0.17438934246699014
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,8,2,128,1,float16,float16,0,0.18879467248916626
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,8,2,128,1,float16,fp8,0,0.18644267320632935
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,8,4,128,1,float16,float16,0,0.22340265909830728
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,8,4,128,1,float16,fp8,0,0.2427519957224528
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,8,8,128,1,float16,float16,0,0.23033599058787027
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,8,8,128,1,float16,fp8,0,0.2526080012321472
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,8,8,128,1,float16,float16,0,0.1379680037498474
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,8,8,128,1,float16,fp8,0,0.14677332838376364
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,8,1,128,1,float16,float16,0,0.09380267063776652
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,8,1,128,1,float16,fp8,0,0.10470933715502422
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,8,2,128,1,float16,float16,0,0.1088853379090627
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,8,2,128,1,float16,fp8,0,0.11818666259447734
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,8,8,128,1,float16,fp8,0,0.14482133587201437
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,8,4,128,1,float16,float16,0,0.12541866302490234
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,8,4,128,1,float16,fp8,0,0.13505599896113077
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,8,8,128,1,float16,float16,0,0.13911466797192892
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,8,1,128,1,float16,float16,0,1.2072853247324626
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,8,1,128,1,float16,fp8,0,1.3117600282033284
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,8,2,128,1,float16,float16,0,1.3960800170898438
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,8,2,128,1,float16,fp8,0,1.437594731648763
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,8,8,128,1,float16,float16,0,0.9697386423746744
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,8,4,128,1,float16,float16,0,1.8487359682718914
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,8,8,128,1,float16,fp8,0,1.0340373516082764
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,8,4,128,1,float16,fp8,0,1.9790719350179036
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,8,1,128,1,float16,float16,0,0.6224480072657267
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,8,1,128,1,float16,fp8,0,0.6795039971669515
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,8,2,128,1,float16,float16,0,0.7311786810557047
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,8,2,128,1,float16,fp8,0,0.741317351659139
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,8,4,128,1,float16,float16,0,0.9406879742940267
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,8,4,128,1,float16,fp8,0,1.0219093163808186
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,8,8,128,1,float16,float16,0,0.5085813204447428
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,8,8,128,1,float16,float16,0,0.9764373302459717
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,8,8,128,1,float16,fp8,0,1.0360960165659587
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,8,8,128,1,float16,fp8,0,0.5651893218358358
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,8,1,128,1,float16,float16,0,0.33161065975824994
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,8,1,128,1,float16,fp8,0,0.3617546558380127
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,8,2,128,1,float16,float16,0,0.3842986822128296
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,8,2,128,1,float16,fp8,0,0.3959680000940959
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,8,4,128,1,float16,float16,0,0.49907732009887695
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,8,4,128,1,float16,fp8,0,0.5418773492177328
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,8,8,128,1,float16,float16,0,0.5071893135706583
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,8,8,128,1,float16,fp8,0,0.5653280019760132
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,8,8,128,1,float16,float16,0,0.2737119992574056
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,8,8,128,1,float16,fp8,0,0.30525867144266766
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,8,1,128,1,float16,float16,0,0.18031466007232666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,8,1,128,1,float16,fp8,0,0.19655466079711914
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,8,4,128,1,float16,fp8,0,0.2959146698315938
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,8,2,128,1,float16,float16,0,0.21642667055130005
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,8,2,128,1,float16,fp8,0,0.2194719910621643
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,8,4,128,1,float16,float16,0,0.2664320071538289
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,8,8,128,1,float16,float16,0,0.27327466011047363
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,8,8,128,1,float16,fp8,0,0.30401066939036053
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,8,8,128,1,float16,float16,0,0.15612266461054483
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,8,8,128,1,float16,fp8,0,0.17505067586898804
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,8,1,128,1,float16,float16,0,0.10533333818117778
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,8,1,128,1,float16,fp8,0,0.1170186698436737
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,8,2,128,1,float16,float16,0,0.11937600374221802
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,8,2,128,1,float16,fp8,0,0.1252906620502472
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,8,4,128,1,float16,float16,0,0.15002133448918661
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,8,4,128,1,float16,fp8,0,0.16116266449292502
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,8,8,128,1,float16,float16,0,0.15609600146611533
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,8,8,128,1,float16,fp8,0,0.1747093399365743
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,8,2,128,1,float16,float16,0,0.07314133147398631
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,8,8,128,1,float16,float16,0,0.0897173285484314
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,8,8,128,1,float16,fp8,0,0.09806933005650838
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,8,1,128,1,float16,float16,0,0.06705066561698914
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,8,1,128,1,float16,fp8,0,0.07657066484292348
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,8,2,128,1,float16,fp8,0,0.07995200157165527
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,8,4,128,1,float16,float16,0,0.08345599969228108
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,8,4,128,1,float16,fp8,0,0.095551997423172
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,8,8,128,1,float16,float16,0,0.08708799878756206
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,8,8,128,1,float16,fp8,0,0.0983786682287852
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,8,1,128,1,float16,float16,0,1.196069320042928
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,8,1,128,1,float16,fp8,0,1.3009333610534668
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,8,2,128,1,float16,float16,0,1.4819199244181316
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,8,2,128,1,float16,fp8,0,1.481162707010905
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,8,8,128,1,float16,float16,0,1.0637599627176921
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,8,4,128,1,float16,float16,0,2.049210707346598
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,8,8,128,1,float16,fp8,0,1.1457706292470295
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,8,1,128,1,float16,float16,0,0.6136746803919474
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,8,4,128,1,float16,fp8,0,2.1942292849222818
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,8,1,128,1,float16,fp8,0,0.6698986689249674
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,8,2,128,1,float16,float16,0,0.7408533096313477
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,8,2,128,1,float16,fp8,0,0.7612000306447347
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,8,4,128,1,float16,fp8,0,1.1354560057322185
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,8,4,128,1,float16,float16,0,1.0341066519419353
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,8,8,128,1,float16,float16,0,0.5502453247706095
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,8,8,128,1,float16,float16,0,1.062000036239624
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,8,8,128,1,float16,fp8,0,0.6151039997736613
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,8,8,128,1,float16,fp8,0,1.1819946765899658
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,8,1,128,1,float16,float16,0,0.32603732744852704
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,8,4,128,1,float16,float16,0,0.5398666858673096
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,8,1,128,1,float16,fp8,0,0.35567466417948407
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,8,2,128,1,float16,float16,0,0.4041973352432251
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,8,8,128,1,float16,float16,0,0.29054399331410724
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,8,2,128,1,float16,fp8,0,0.39928531646728516
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,8,4,128,1,float16,fp8,0,0.5951040188471476
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,8,8,128,1,float16,float16,0,0.5514666636784872
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,8,8,128,1,float16,fp8,0,0.6128640174865723
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,8,8,128,1,float16,fp8,0,0.3222666581471761
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,8,1,128,1,float16,float16,0,0.17707733313242593
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,8,1,128,1,float16,fp8,0,0.19471466541290283
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,8,2,128,1,float16,float16,0,0.22454400857289633
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,8,2,128,1,float16,fp8,0,0.21989333629608154
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,8,4,128,1,float16,float16,0,0.28726400931676227
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,8,4,128,1,float16,fp8,0,0.3185173273086548
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,8,8,128,1,float16,float16,0,0.29050666093826294
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,8,8,128,1,float16,fp8,0,0.32025599479675293
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,8,8,128,1,float16,float16,0,0.16334399580955505
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,8,8,128,1,float16,fp8,0,0.18373332420984903
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,8,1,128,1,float16,float16,0,0.09907199939092
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,8,1,128,1,float16,fp8,0,0.11004799604415894
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,8,2,128,1,float16,float16,0,0.1241386632124583
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,8,2,128,1,float16,fp8,0,0.12260799606641133
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,8,4,128,1,float16,float16,0,0.15775466958681741
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,8,4,128,1,float16,fp8,0,0.17281067371368408
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,8,8,128,1,float16,float16,0,0.1637333333492279
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,8,8,128,1,float16,fp8,0,0.18516800800959268
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,8,8,128,1,float16,float16,0,0.09477333227793376
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,8,8,128,1,float16,fp8,0,0.09806933005650838
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,8,1,128,1,float16,float16,0,0.06182399888833364
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,8,1,128,1,float16,fp8,0,0.0699893335501353
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,8,2,128,1,float16,float16,0,0.07063999772071838
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,8,2,128,1,float16,fp8,0,0.07682666679223378
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,8,4,128,1,float16,float16,0,0.08115733166535695
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,8,4,128,1,float16,fp8,0,0.09257599711418152
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,8,8,128,1,float16,float16,0,0.09343467156092326
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,8,8,128,1,float16,fp8,0,0.10217066605885823
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,8,8,128,1,float16,float16,0,0.04831466575463613
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,8,8,128,1,float16,fp8,0,0.054485330979029335
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,8,1,128,1,float16,float16,0,0.038906666139761605
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,8,1,128,1,float16,fp8,0,0.04384533564249674
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,8,2,128,1,float16,float16,0,0.042362665136655174
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,8,2,128,1,float16,fp8,0,0.047007997830708824
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,8,4,128,1,float16,float16,0,0.047456001242001854
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,8,4,128,1,float16,fp8,0,0.05427733560403188
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,8,8,128,1,float16,float16,0,0.048437332113583885
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,8,8,128,1,float16,fp8,0,0.054458667834599815
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,8,1,128,1,float16,float16,0,0.8484319845835367
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,8,1,128,1,float16,fp8,0,0.8732266426086426
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,8,2,128,1,float16,float16,0,1.0297013123830159
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,8,8,128,1,float16,float16,0,0.7751946449279785
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,8,2,128,1,float16,fp8,0,1.00764266649882
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,8,4,128,1,float16,float16,0,1.4753066698710124
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,8,8,128,1,float16,fp8,0,0.8292640050252279
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,8,1,128,1,float16,float16,0,0.43964266777038574
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,8,4,128,1,float16,fp8,0,1.5616960525512695
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,8,2,128,1,float16,float16,0,0.5375413497289022
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,8,1,128,1,float16,fp8,0,0.4521919886271159
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,8,2,128,1,float16,fp8,0,0.5229333241780599
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,8,4,128,1,float16,fp8,0,0.7967306772867838
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,8,4,128,1,float16,float16,0,0.7527093092600504
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,8,8,128,1,float16,float16,0,0.768058697382609
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,8,8,128,1,float16,fp8,0,0.8198133309682211
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,8,8,128,1,float16,float16,0,0.4079786539077759
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,8,1,128,1,float16,float16,0,0.23439466953277588
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,8,8,128,1,float16,fp8,0,0.4349546829859416
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,8,1,128,1,float16,fp8,0,0.24168533086776733
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,8,4,128,1,float16,float16,0,0.39396798610687256
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,8,2,128,1,float16,float16,0,0.2850186626116435
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,8,2,128,1,float16,fp8,0,0.2773653268814087
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,8,4,128,1,float16,fp8,0,0.4235786596934001
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,8,8,128,1,float16,float16,0,0.40766934553782147
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,8,8,128,1,float16,fp8,0,0.45057066281636554
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,8,8,128,1,float16,float16,0,0.21614933013916016
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,8,8,128,1,float16,fp8,0,0.23868266741434732
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,8,1,128,1,float16,float16,0,0.13115732868512472
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,8,1,128,1,float16,fp8,0,0.13397333025932312
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,8,2,128,1,float16,float16,0,0.16153599818547568
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,8,2,128,1,float16,fp8,0,0.15522666772206625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,8,4,128,1,float16,float16,0,0.21015467246373495
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,8,4,128,1,float16,fp8,0,0.2299519975980123
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,8,8,128,1,float16,float16,0,0.21618133783340454
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,8,8,128,1,float16,fp8,0,0.2392373283704122
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,8,8,128,1,float16,float16,0,0.11899200081825256
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,8,8,128,1,float16,fp8,0,0.13332266608874002
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,8,4,128,1,float16,fp8,0,0.1164533297220866
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,8,1,128,1,float16,float16,0,0.07296533385912578
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,8,1,128,1,float16,fp8,0,0.07678933441638947
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,8,2,128,1,float16,float16,0,0.08557867010434468
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,8,2,128,1,float16,fp8,0,0.08316266536712646
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,8,4,128,1,float16,float16,0,0.11772800485293071
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,8,8,128,1,float16,float16,0,0.11864533027013142
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,8,8,128,1,float16,fp8,0,0.13209066788355509
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,8,8,128,1,float16,float16,0,0.0673280010620753
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,8,8,128,1,float16,fp8,0,0.06941866874694824
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,8,1,128,1,float16,float16,0,0.0460746685663859
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,8,1,128,1,float16,fp8,0,0.04900800188382467
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,8,2,128,1,float16,float16,0,0.0521066685517629
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,8,2,128,1,float16,fp8,0,0.05435200035572052
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,8,4,128,1,float16,float16,0,0.06089599927266439
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,8,4,128,1,float16,fp8,0,0.06754666566848755
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,8,8,128,1,float16,float16,0,0.06790400048096974
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,8,8,128,1,float16,fp8,0,0.06909866631031036
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,8,8,128,1,float16,float16,0,0.03976533313592275
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,8,8,128,1,float16,fp8,0,0.04470933477083842
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,8,1,128,1,float16,float16,0,0.032357332607110344
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,8,1,128,1,float16,fp8,0,0.035455999275048576
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,8,2,128,1,float16,float16,0,0.035071998834609985
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,8,2,128,1,float16,fp8,0,0.03724266588687897
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,8,4,128,1,float16,float16,0,0.039221333960692085
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,8,4,128,1,float16,fp8,0,0.04383466641108195
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,8,8,128,1,float16,float16,0,0.039850667119026184
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,8,8,128,1,float16,fp8,0,0.04493333399295807
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,8,1,128,1,float16,float16,0,0.9230666955312093
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,8,1,128,1,float16,fp8,0,0.9862826665242513
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,8,8,128,1,float16,float16,0,0.9256107012430826
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,8,2,128,1,float16,float16,0,1.1745920181274414
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,8,2,128,1,float16,fp8,0,1.1605119705200195
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,8,8,128,1,float16,fp8,0,0.9811253547668457
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,8,1,128,1,float16,float16,0,0.4758400122324626
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,8,2,128,1,float16,float16,0,0.5910346508026123
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,8,1,128,1,float16,fp8,0,0.5074133475621542
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,8,4,128,1,float16,float16,0,1.760543982187907
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,8,2,128,1,float16,fp8,0,0.5938453276952108
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,8,4,128,1,float16,fp8,0,1.9020053545633953
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,8,4,128,1,float16,fp8,0,0.9680426915486654
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,8,4,128,1,float16,float16,0,0.8919626871744791
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,8,8,128,1,float16,float16,0,0.9225653012593588
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,8,8,128,1,float16,fp8,0,1.0231359799702961
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,8,8,128,1,float16,fp8,0,0.5355999867121378
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,8,8,128,1,float16,float16,0,0.4761973222096761
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,8,1,128,1,float16,fp8,0,0.26654932896296185
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,8,2,128,1,float16,fp8,0,0.31513599554697674
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,8,2,128,1,float16,float16,0,0.3279199997584025
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,8,8,128,1,float16,float16,0,0.47549867630004883
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,8,1,128,1,float16,float16,0,0.25149865945180255
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,8,8,128,1,float16,float16,0,0.24637333552042642
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,8,8,128,1,float16,fp8,0,0.5371040105819702
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,8,4,128,1,float16,float16,0,0.45784000555674237
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,8,4,128,1,float16,fp8,0,0.49772266546885174
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,8,8,128,1,float16,fp8,0,0.28270934025446576
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,8,1,128,1,float16,float16,0,0.13848533233006796
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,8,1,128,1,float16,fp8,0,0.14714133739471436
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,8,2,128,1,float16,float16,0,0.17488000790278116
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,8,2,128,1,float16,fp8,0,0.1713599960009257
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,8,4,128,1,float16,float16,0,0.24308266242345175
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,8,4,128,1,float16,fp8,0,0.2686240077018738
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,8,8,128,1,float16,float16,0,0.2486720085144043
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,8,8,128,1,float16,float16,0,0.13507733742396036
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,8,8,128,1,float16,fp8,0,0.2780533234278361
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,8,8,128,1,float16,fp8,0,0.15568000078201294
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,8,1,128,1,float16,float16,0,0.07580266892910004
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,8,1,128,1,float16,fp8,0,0.08172800143559773
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,8,2,128,1,float16,float16,0,0.09959999720255534
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,8,2,128,1,float16,fp8,0,0.0937600036462148
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,8,4,128,1,float16,float16,0,0.13327999909718832
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,8,4,128,1,float16,fp8,0,0.14989333351453146
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,8,8,128,1,float16,float16,0,0.13498666882514954
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,8,8,128,1,float16,fp8,0,0.15732266505559286
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,8,8,128,1,float16,float16,0,0.07669333120187123
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,8,8,128,1,float16,fp8,0,0.08822932839393616
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,8,1,128,1,float16,float16,0,0.046485334634780884
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,8,1,128,1,float16,fp8,0,0.05146666864554087
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,8,2,128,1,float16,float16,0,0.052005335688591
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,8,2,128,1,float16,fp8,0,0.056015998125076294
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,8,4,128,1,float16,float16,0,0.06443200012048085
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,8,4,128,1,float16,fp8,0,0.07254933317502339
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,8,8,128,1,float16,float16,0,0.0767146646976471
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,8,8,128,1,float16,fp8,0,0.08693333466847737
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,8,8,128,1,float16,float16,0,0.03679466744263967
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,8,8,128,1,float16,fp8,0,0.042319998145103455
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,8,1,128,1,float16,float16,0,0.027888000011444092
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,8,1,128,1,float16,fp8,0,0.030554667115211487
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,8,2,128,1,float16,float16,0,0.030917334059874218
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,8,2,128,1,float16,fp8,0,0.034160000582536064
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,8,4,128,1,float16,float16,0,0.0363520011305809
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,8,4,128,1,float16,fp8,0,0.040965333580970764
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,8,8,128,1,float16,float16,0,0.03676799933115641
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,8,8,128,1,float16,fp8,0,0.04238933324813843
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,8,8,128,1,float16,float16,0,0.03017599880695343
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,8,8,128,1,float16,fp8,0,0.035349334279696144
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,8,1,128,1,float16,float16,0,0.02498133232196172
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,8,1,128,1,float16,fp8,0,0.028016000986099243
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,8,8,128,1,float16,float16,0,0.03029866764942805
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,8,2,128,1,float16,float16,0,0.02749866743882497
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,8,2,128,1,float16,fp8,0,0.030346666773160298
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,8,4,128,1,float16,float16,0,0.030063999195893604
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,8,4,128,1,float16,fp8,0,0.03362133353948593
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,8,8,128,1,float16,fp8,0,0.035173334181308746
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,8,1,128,1,float16,float16,0,0.6127786636352539
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,8,1,128,1,float16,fp8,0,0.7004746596018473
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,8,1,128,1,float16,float16,0,0.31830400228500366
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,8,2,128,1,float16,float16,0,0.8170186678568522
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,8,2,128,1,float16,fp8,0,0.8885653018951416
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,8,1,128,1,float16,fp8,0,0.36351998647054035
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,8,2,128,1,float16,float16,0,0.4379946788152059
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,8,8,128,1,float16,fp8,0,0.9163040320078532
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,8,8,128,1,float16,float16,0,0.7627200285593668
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,8,2,128,1,float16,fp8,0,0.45343999067942303
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,8,4,128,1,float16,float16,0,1.450474739074707
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,8,4,128,1,float16,fp8,0,1.6315466562906902
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,8,4,128,1,float16,float16,0,0.7375413576761881
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,8,4,128,1,float16,fp8,0,0.8470453421274821
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,8,8,128,1,float16,fp8,0,0.4598879814147949
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,8,8,128,1,float16,float16,0,0.38042132059733075
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,8,8,128,1,float16,fp8,0,0.8990186850229899
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,8,8,128,1,float16,float16,0,0.7373706499735514
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,8,1,128,1,float16,float16,0,0.17138133446375528
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,8,2,128,1,float16,float16,0,0.24750399589538574
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,8,4,128,1,float16,float16,0,0.3790293137232463
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,8,8,128,1,float16,float16,0,0.20537600914637247
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,8,4,128,1,float16,fp8,0,0.42610132694244385
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,8,8,128,1,float16,fp8,0,0.25362666447957355
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,8,2,128,1,float16,fp8,0,0.24209066232045492
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,8,8,128,1,float16,float16,0,0.38438932100931805
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,8,1,128,1,float16,fp8,0,0.10762133200963338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,8,1,128,1,float16,float16,0,0.0940106709798177
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,8,2,128,1,float16,float16,0,0.12941333651542664
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,8,2,128,1,float16,fp8,0,0.1328426698843638
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,8,1,128,1,float16,fp8,0,0.19543999433517456
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,8,8,128,1,float16,fp8,0,0.45557332038879395
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,8,4,128,1,float16,fp8,0,0.23351999123891196
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,8,4,128,1,float16,float16,0,0.19519466161727905
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,8,8,128,1,float16,float16,0,0.20265599091847739
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,8,8,128,1,float16,fp8,0,0.25301865736643475
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,8,8,128,1,float16,float16,0,0.11379733681678772
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,8,8,128,1,float16,fp8,0,0.14241600036621094
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,8,1,128,1,float16,fp8,0,0.06112533311049143
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,8,1,128,1,float16,float16,0,0.053039997816085815
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,8,2,128,1,float16,float16,0,0.07509333391984303
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,8,2,128,1,float16,fp8,0,0.07230933507283528
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,8,4,128,1,float16,float16,0,0.10645332932472229
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,8,8,128,1,float16,float16,0,0.06364800035953522
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,8,4,128,1,float16,fp8,0,0.13030399878819784
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,8,8,128,1,float16,float16,0,0.11294933160146077
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,8,8,128,1,float16,fp8,0,0.14178133010864258
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,8,8,128,1,float16,fp8,0,0.06806933383146922
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,8,1,128,1,float16,float16,0,0.03329066683848699
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,8,1,128,1,float16,fp8,0,0.03826666623353958
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,8,2,128,1,float16,float16,0,0.03842666745185852
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,8,2,128,1,float16,fp8,0,0.043663998444875084
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,8,4,128,1,float16,float16,0,0.049413333336512245
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,8,4,128,1,float16,fp8,0,0.06238399942715963
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,8,8,128,1,float16,float16,0,0.0633653352657954
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,8,8,128,1,float16,fp8,0,0.0699893335501353
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,8,8,128,1,float16,float16,0,0.02992533395687739
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,8,2,128,1,float16,float16,0,0.024080000817775726
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,8,8,128,1,float16,fp8,0,0.0374293327331543
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,8,1,128,1,float16,float16,0,0.021573332448800404
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,8,1,128,1,float16,fp8,0,0.025418666501839954
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,8,2,128,1,float16,fp8,0,0.028853334486484528
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,8,4,128,1,float16,float16,0,0.02900800108909607
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,8,4,128,1,float16,fp8,0,0.037087999284267426
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,8,8,128,1,float16,float16,0,0.029552000264326733
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,8,8,128,1,float16,fp8,0,0.03755733370780945
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,8,8,128,1,float16,float16,0,0.023919999599456787
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,8,8,128,1,float16,fp8,0,0.030271999537944794
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,8,1,128,1,float16,float16,0,0.019797333826621372
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,8,1,128,1,float16,fp8,0,0.02362666775782903
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,8,2,128,1,float16,float16,0,0.021066665649414062
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,8,2,128,1,float16,fp8,0,0.02553066611289978
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,8,4,128,1,float16,float16,0,0.023237332701683044
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,8,4,128,1,float16,fp8,0,0.029882666965325672
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,8,8,128,1,float16,float16,0,0.023941333095232647
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,8,8,128,1,float16,fp8,0,0.030405332644780476
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,8,8,128,1,float16,float16,0,0.020960000654061634
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,8,8,128,1,float16,fp8,0,0.026485333840052288
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,8,4,128,1,float16,float16,0,0.020746666938066483
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,8,1,128,1,float16,float16,0,0.019551999866962433
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,8,1,128,1,float16,fp8,0,0.022517333428064983
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,8,2,128,1,float16,float16,0,0.019546666493018467
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,8,2,128,1,float16,fp8,0,0.023546665906906128
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,8,4,128,1,float16,fp8,0,0.0264533335963885
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,8,8,128,1,float16,float16,0,0.021007999777793884
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,8,8,128,1,float16,fp8,0,0.026762666801611584
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,8,1,128,1,float16,float16,0,0.27079999446868896
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,8,1,128,1,float16,fp8,0,0.31414933999379474
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,8,2,128,1,float16,float16,0,0.38470399379730225
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,8,2,128,1,float16,fp8,0,0.40509335199991864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,8,1,128,1,float16,float16,0,0.14482667048772177
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,8,8,128,1,float16,float16,0,0.35917333761850995
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,8,1,128,1,float16,fp8,0,0.1685333251953125
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,8,2,128,1,float16,float16,0,0.22203733523686728
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,8,2,128,1,float16,fp8,0,0.216922660668691
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,8,8,128,1,float16,fp8,0,0.4102773269017537
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,8,4,128,1,float16,float16,0,0.6890666484832764
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,8,4,128,1,float16,fp8,0,0.7932533423105875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,8,4,128,1,float16,float16,0,0.3476373354593913
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,8,4,128,1,float16,fp8,0,0.40915199120839435
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,8,8,128,1,float16,float16,0,0.19215999046961466
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,8,8,128,1,float16,float16,0,0.3580373525619507
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,8,8,128,1,float16,fp8,0,0.22961066166559854
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,8,8,128,1,float16,fp8,0,0.42025065422058105
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,8,1,128,1,float16,float16,0,0.08106666803359985
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,8,2,128,1,float16,float16,0,0.11704533298810323
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,8,2,128,1,float16,fp8,0,0.12172266840934753
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,8,4,128,1,float16,float16,0,0.18222399552663168
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,8,1,128,1,float16,fp8,0,0.09553066889444987
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,8,4,128,1,float16,fp8,0,0.21873066822687784
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,8,8,128,1,float16,float16,0,0.19246933857599893
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,8,8,128,1,float16,float16,0,0.10609599947929382
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,8,8,128,1,float16,fp8,0,0.2355253299077352
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,8,8,128,1,float16,fp8,0,0.1297546625137329
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,8,1,128,1,float16,float16,0,0.04417600234349569
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,8,1,128,1,float16,fp8,0,0.05136533578236898
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,8,2,128,1,float16,float16,0,0.06638933221499126
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,8,8,128,1,float16,fp8,0,0.13055466612180075
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,8,2,128,1,float16,fp8,0,0.0649599979321162
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,8,4,128,1,float16,float16,0,0.10078932841618855
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,8,4,128,1,float16,fp8,0,0.12408533692359924
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,8,8,128,1,float16,float16,0,0.10598933696746826
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,8,8,128,1,float16,float16,0,0.05710400144259135
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,8,8,128,1,float16,fp8,0,0.06293333570162456
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,8,1,128,1,float16,float16,0,0.027674667537212372
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,8,1,128,1,float16,fp8,0,0.03292799989382426
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,8,2,128,1,float16,float16,0,0.03292799989382426
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,8,2,128,1,float16,fp8,0,0.038373333712418876
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,8,4,128,1,float16,float16,0,0.04414399961630503
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,8,4,128,1,float16,fp8,0,0.05598933498064677
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,8,8,128,1,float16,float16,0,0.05755199988683065
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,8,8,128,1,float16,fp8,0,0.06592000027497609
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,8,8,128,1,float16,float16,0,0.026863999664783478
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,8,8,128,1,float16,fp8,0,0.03453333427508672
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,8,1,128,1,float16,float16,0,0.01855466639002164
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,8,1,128,1,float16,fp8,0,0.021754667162895203
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,8,2,128,1,float16,float16,0,0.021221332252025604
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,8,2,128,1,float16,fp8,0,0.02496533344189326
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,8,4,128,1,float16,float16,0,0.026421333352724712
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,8,4,128,1,float16,fp8,0,0.03297066688537598
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,8,8,128,1,float16,float16,0,0.026901334524154663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,8,1,128,1,float16,float16,0,0.0169813334941864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,8,8,128,1,float16,fp8,0,0.0345920001467069
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,8,8,128,1,float16,float16,0,0.02096533278624217
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,8,8,128,1,float16,fp8,0,0.02743999908367793
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,8,1,128,1,float16,fp8,0,0.020053333292404812
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,8,2,128,1,float16,float16,0,0.018373332917690277
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,8,2,128,1,float16,fp8,0,0.02149333308140437
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,8,4,128,1,float16,float16,0,0.020746666938066483
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,8,4,128,1,float16,fp8,0,0.025781333446502686
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,8,8,128,1,float16,float16,0,0.020949333906173706
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,8,8,128,1,float16,fp8,0,0.027295999228954315
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,8,8,128,1,float16,float16,0,0.018309333672126133
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,8,8,128,1,float16,fp8,0,0.023370665808518726
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,8,1,128,1,float16,float16,0,0.01682666689157486
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,8,1,128,1,float16,fp8,0,0.019226666539907455
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,8,2,128,1,float16,float16,0,0.017184000462293625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,8,2,128,1,float16,fp8,0,0.019610666980346043
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,8,4,128,1,float16,float16,0,0.018079999834299088
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,8,4,128,1,float16,fp8,0,0.022517333428064983
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,8,8,128,1,float16,float16,0,0.018197332819302876
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,8,8,128,1,float16,fp8,0,0.02372266600529353
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,8,8,128,1,float16,float16,0,0.015856000284353893
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,8,8,128,1,float16,fp8,0,0.01958400011062622
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,8,1,128,1,float16,float16,0,0.0161920003592968
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,8,1,128,1,float16,fp8,0,0.01815466706951459
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,8,2,128,1,float16,float16,0,0.015578666081031164
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,8,2,128,1,float16,fp8,0,0.018298666924238205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,8,4,128,1,float16,fp8,0,0.019498666127522785
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,8,4,128,1,float16,float16,0,0.01590399940808614
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,8,8,128,1,float16,float16,0,0.01591466615597407
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,8,8,128,1,float16,fp8,0,0.021503999829292297
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,8,1,128,1,float16,float16,0,0.13381866614023843
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,8,1,128,1,float16,fp8,0,0.1481066644191742
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,8,2,128,1,float16,float16,0,0.21118932962417603
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,8,2,128,1,float16,fp8,0,0.20521066586176553
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,8,8,128,1,float16,float16,0,0.1867199937502543
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,8,4,128,1,float16,float16,0,0.335098663965861
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,8,1,128,1,float16,float16,0,0.07372800012429555
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,8,8,128,1,float16,fp8,0,0.2167359987894694
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,8,4,128,1,float16,fp8,0,0.3823466698328654
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,8,1,128,1,float16,fp8,0,0.08391466736793518
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,8,2,128,1,float16,float16,0,0.11334400375684102
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,8,2,128,1,float16,fp8,0,0.11122133334477742
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,8,4,128,1,float16,float16,0,0.17871467272440592
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,8,8,128,1,float16,fp8,0,0.21309866507848105
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,8,4,128,1,float16,fp8,0,0.20570133129755655
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,8,8,128,1,float16,float16,0,0.10264000296592712
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,8,8,128,1,float16,fp8,0,0.11795199910799663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,8,8,128,1,float16,float16,0,0.18733332554499307
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,8,1,128,1,float16,float16,0,0.039173332353432976
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,8,1,128,1,float16,fp8,0,0.04515199859937032
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,8,2,128,1,float16,float16,0,0.06457066535949707
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,8,2,128,1,float16,fp8,0,0.05770133435726166
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,8,4,128,1,float16,float16,0,0.09706133604049683
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,8,4,128,1,float16,fp8,0,0.11190932989120483
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,8,8,128,1,float16,float16,0,0.10240000486373901
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,8,8,128,1,float16,fp8,0,0.12025066216786702
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,8,8,128,1,float16,float16,0,0.05444266895453135
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,8,8,128,1,float16,fp8,0,0.05643199880917867
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,8,1,128,1,float16,float16,0,0.024933333198229473
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,8,1,128,1,float16,fp8,0,0.028570666909217834
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,8,2,128,1,float16,float16,0,0.03002133220434189
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,8,2,128,1,float16,fp8,0,0.0337119996547699
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,8,4,128,1,float16,float16,0,0.04168533285458883
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,8,4,128,1,float16,fp8,0,0.04877866804599762
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,8,8,128,1,float16,float16,0,0.0553706685702006
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,8,8,128,1,float16,fp8,0,0.0554613322019577
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,8,8,128,1,float16,float16,0,0.02516799916823705
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,8,8,128,1,float16,fp8,0,0.03012266755104065
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,8,1,128,1,float16,float16,0,0.01730666682124138
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,8,1,128,1,float16,fp8,0,0.019685332973798115
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,8,2,128,1,float16,float16,0,0.01964266722400983
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,8,2,128,1,float16,fp8,0,0.023077333966890972
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,8,4,128,1,float16,float16,0,0.02499733368555705
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,8,4,128,1,float16,fp8,0,0.029674666623274486
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,8,8,128,1,float16,float16,0,0.025274666647116344
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,8,8,128,1,float16,fp8,0,0.030207999050617218
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,8,8,128,1,float16,float16,0,0.0194560003777345
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,8,8,128,1,float16,fp8,0,0.022944000860055287
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,8,1,128,1,float16,float16,0,0.01545599972208341
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,8,1,128,1,float16,fp8,0,0.01800000046690305
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,8,2,128,1,float16,float16,0,0.016805333395799
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,8,2,128,1,float16,fp8,0,0.01911466692884763
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,8,4,128,1,float16,float16,0,0.019253333409627277
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,8,4,128,1,float16,fp8,0,0.023071999351183575
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,8,8,128,1,float16,float16,0,0.019541333119074505
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,8,8,128,1,float16,fp8,0,0.022517333428064983
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,8,8,128,1,float16,float16,0,0.016677333662907284
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,8,8,128,1,float16,fp8,0,0.01926400015751521
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,8,1,128,1,float16,float16,0,0.014991999914248785
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,8,1,128,1,float16,fp8,0,0.01706133286158244
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,8,2,128,1,float16,float16,0,0.015290666371583939
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,8,8,128,1,float16,float16,0,0.01657066618402799
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,8,2,128,1,float16,fp8,0,0.017653333644072216
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,8,4,128,1,float16,float16,0,0.016458666572968166
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,8,4,128,1,float16,fp8,0,0.019434666881958645
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,8,8,128,1,float16,fp8,0,0.019487999379634857
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,8,8,128,1,float16,float16,0,0.01423466702302297
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,8,8,128,1,float16,fp8,0,0.016613333175579708
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,8,2,128,1,float16,fp8,0,0.01635733370979627
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,8,1,128,1,float16,float16,0,0.014106666048367819
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,8,1,128,1,float16,fp8,0,0.016085332880417507
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,8,2,128,1,float16,float16,0,0.01443733274936676
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,8,4,128,1,float16,float16,0,0.014570667097965876
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,8,4,128,1,float16,fp8,0,0.016688000410795212
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,8,8,128,1,float16,float16,0,0.014426667243242264
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,8,8,128,1,float16,fp8,0,0.016554666062196095
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,8,8,128,1,float16,float16,0,0.013728000223636627
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,8,8,128,1,float16,fp8,0,0.016021333634853363
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,8,1,128,1,float16,float16,0,0.013861333330472311
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,8,1,128,1,float16,fp8,0,0.015957333147525787
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,8,2,128,1,float16,float16,0,0.014058666924635569
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,8,2,128,1,float16,fp8,0,0.016154666741689045
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,8,4,128,1,float16,float16,0,0.014042666802803675
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,8,4,128,1,float16,fp8,0,0.01621333385507266
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,8,8,128,1,float16,float16,0,0.013850666582584381
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,8,8,128,1,float16,fp8,0,0.015765332927306492
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,8,1,128,1,float16,float16,0,0.07285333176453908
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,8,1,128,1,float16,fp8,0,0.08400000135103862
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,8,2,128,1,float16,float16,0,0.11275733510653178
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,8,2,128,1,float16,fp8,0,0.11145066221555074
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,8,8,128,1,float16,float16,0,0.1079306701819102
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,8,4,128,1,float16,float16,0,0.1781546672185262
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,8,1,128,1,float16,float16,0,0.039264000952243805
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,8,4,128,1,float16,fp8,0,0.20475733280181885
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,8,8,128,1,float16,fp8,0,0.11343466242154439
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,8,1,128,1,float16,fp8,0,0.045253331462542214
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,8,2,128,1,float16,float16,0,0.06333866715431213
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,8,2,128,1,float16,fp8,0,0.05690666536490122
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,8,4,128,1,float16,float16,0,0.0957493285338084
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,8,4,128,1,float16,fp8,0,0.11026666561762492
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,8,8,128,1,float16,float16,0,0.10919466614723206
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,8,8,128,1,float16,fp8,0,0.05061866839726766
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,8,8,128,1,float16,fp8,0,0.11611732840538025
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,8,8,128,1,float16,float16,0,0.05779199798901876
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,8,1,128,1,float16,float16,0,0.024613333245118458
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,8,1,128,1,float16,fp8,0,0.028815999627113342
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,8,2,128,1,float16,float16,0,0.029829333225886028
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,8,2,128,1,float16,fp8,0,0.03389866650104523
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,8,4,128,1,float16,float16,0,0.04142399877309799
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,8,4,128,1,float16,fp8,0,0.04906133313973745
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,8,8,128,1,float16,float16,0,0.05852800110975901
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,8,8,128,1,float16,fp8,0,0.05146666864554087
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,8,8,128,1,float16,float16,0,0.02665599932273229
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,8,8,128,1,float16,fp8,0,0.02959999938805898
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,8,1,128,1,float16,float16,0,0.016997333616018295
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,8,1,128,1,float16,fp8,0,0.01974933346112569
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,8,2,128,1,float16,float16,0,0.019626667102177937
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,8,2,128,1,float16,fp8,0,0.022895999252796173
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,8,4,128,1,float16,float16,0,0.02462933212518692
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,8,4,128,1,float16,fp8,0,0.029338667790095013
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,8,8,128,1,float16,float16,0,0.026629333694775898
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,8,8,128,1,float16,fp8,0,0.02959999938805898
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,8,8,128,1,float16,float16,0,0.017808000246683758
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,8,8,128,1,float16,fp8,0,0.019386666516462963
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,8,1,128,1,float16,float16,0,0.015311999867359797
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,8,1,128,1,float16,fp8,0,0.01785600061217944
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,8,2,128,1,float16,float16,0,0.016613333175579708
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,8,2,128,1,float16,fp8,0,0.019146667172511418
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,8,4,128,1,float16,float16,0,0.019071999937295914
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,8,8,128,1,float16,fp8,0,0.015775999675194424
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,8,4,128,1,float16,fp8,0,0.02292799949645996
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,8,8,128,1,float16,float16,0,0.017743999759356182
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,8,8,128,1,float16,fp8,0,0.01930133377512296
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,8,8,128,1,float16,float16,0,0.014896000425020853
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,8,1,128,1,float16,float16,0,0.014864000181357065
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,8,1,128,1,float16,fp8,0,0.017360000560681026
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,8,2,128,1,float16,float16,0,0.015173333386580149
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,8,2,128,1,float16,fp8,0,0.017637333522240322
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,8,4,128,1,float16,float16,0,0.01624533285697301
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,8,4,128,1,float16,fp8,0,0.018895999838908512
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,8,8,128,1,float16,float16,0,0.014864000181357065
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,8,1,128,1,float16,fp8,0,0.016074666132529575
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,8,8,128,1,float16,fp8,0,0.015802666544914246
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,8,8,128,1,float16,float16,0,0.01312000056107839
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,8,8,128,1,float16,fp8,0,0.013989333063364029
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,8,1,128,1,float16,float16,0,0.014576000471909841
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,8,2,128,1,float16,float16,0,0.014538666854302088
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,8,2,128,1,float16,fp8,0,0.0162773331006368
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,8,4,128,1,float16,float16,0,0.01482133318980535
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,8,4,128,1,float16,fp8,0,0.016650666793187458
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,8,8,128,1,float16,float16,0,0.013557333499193192
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,8,8,128,1,float16,fp8,0,0.01357866699496905
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,8,8,128,1,float16,float16,0,0.01246400053302447
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,8,8,128,1,float16,fp8,0,0.013455999394257864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,8,1,128,1,float16,float16,0,0.013610667238632837
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,8,1,128,1,float16,fp8,0,0.015754666179418564
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,8,2,128,1,float16,float16,0,0.013978666315476099
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,8,2,128,1,float16,fp8,0,0.01621333385507266
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,8,4,128,1,float16,float16,0,0.013999999811251959
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,8,4,128,1,float16,fp8,0,0.01580799991885821
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,8,8,128,1,float16,float16,0,0.012805332740147909
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,8,8,128,1,float16,fp8,0,0.013125333935022354
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,8,8,128,1,float16,float16,0,0.01184533288081487
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,8,8,128,1,float16,fp8,0,0.01246400053302447
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,8,1,128,1,float16,float16,0,0.013440000514189402
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,8,1,128,1,float16,fp8,0,0.015615999698638916
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,8,2,128,1,float16,float16,0,0.01360000049074491
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,8,2,128,1,float16,fp8,0,0.015919999529918034
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,8,4,128,1,float16,float16,0,0.013605333864688873
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,8,4,128,1,float16,fp8,0,0.01571200042963028
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,8,8,128,1,float16,float16,0,0.011893333246310553
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,8,8,128,1,float16,fp8,0,0.0124746672809124
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,8,1,128,1,float16,float16,0,0.039066667358080544
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,8,1,128,1,float16,fp8,0,0.04484800000985464
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,8,2,128,1,float16,float16,0,0.06186666587988535
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,8,2,128,1,float16,fp8,0,0.05604266623655955
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,8,8,128,1,float16,float16,0,0.07035199801127116
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,8,4,128,1,float16,float16,0,0.10175466537475586
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,8,8,128,1,float16,fp8,0,0.06352533400058746
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,8,4,128,1,float16,fp8,0,0.10628799597422282
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,8,1,128,1,float16,float16,0,0.02476266771554947
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,8,1,128,1,float16,fp8,0,0.02886933336655299
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,8,2,128,1,float16,float16,0,0.03025600065787633
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,8,2,128,1,float16,fp8,0,0.03395200024048487
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,8,4,128,1,float16,float16,0,0.04631466666857401
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,8,8,128,1,float16,float16,0,0.07009066641330719
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,8,4,128,1,float16,fp8,0,0.04833599925041199
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,8,8,128,1,float16,float16,0,0.03319466610749563
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,8,8,128,1,float16,fp8,0,0.06814933319886525
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,8,2,128,1,float16,float16,0,0.01940800001223882
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,8,8,128,1,float16,fp8,0,0.035973332822322845
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,8,1,128,1,float16,fp8,0,0.019871999820073444
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,8,1,128,1,float16,float16,0,0.017173333714405697
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,8,2,128,1,float16,fp8,0,0.022800001005331676
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,8,4,128,1,float16,float16,0,0.02643200010061264
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,8,4,128,1,float16,fp8,0,0.029093332588672638
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,8,8,128,1,float16,float16,0,0.0330826664964358
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,8,8,128,1,float16,fp8,0,0.035743998984495796
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,8,8,128,1,float16,float16,0,0.02125866711139679
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,8,8,128,1,float16,fp8,0,0.022869333624839783
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,8,1,128,1,float16,float16,0,0.015402667224407196
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,8,1,128,1,float16,fp8,0,0.017887999614079792
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,8,2,128,1,float16,float16,0,0.016490666816631954
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,8,2,128,1,float16,fp8,0,0.019237333287795384
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,8,4,128,1,float16,float16,0,0.01759999990463257
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,8,4,128,1,float16,fp8,0,0.018960000326236088
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,8,8,128,1,float16,float16,0,0.021365332106749218
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,8,8,128,1,float16,fp8,0,0.02272533377011617
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,8,8,128,1,float16,float16,0,0.014885333677132925
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,8,8,128,1,float16,fp8,0,0.01603200038274129
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,8,1,128,1,float16,float16,0,0.014698666830857595
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,8,1,128,1,float16,fp8,0,0.01720000058412552
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,8,2,128,1,float16,float16,0,0.014949332922697067
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,8,2,128,1,float16,fp8,0,0.01739199956258138
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,8,4,128,1,float16,float16,0,0.014709333578745524
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,8,4,128,1,float16,fp8,0,0.015615999698638916
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,8,8,128,1,float16,float16,0,0.014869333555301031
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,8,8,128,1,float16,fp8,0,0.015962666521469753
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,8,2,128,1,float16,float16,0,0.014287999520699183
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,8,8,128,1,float16,float16,0,0.013370666652917862
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,8,8,128,1,float16,fp8,0,0.014240000396966934
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,8,4,128,1,float16,fp8,0,0.013514666507641474
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,8,1,128,1,float16,float16,0,0.014389333625634512
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,8,1,128,1,float16,fp8,0,0.01599466676513354
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,8,2,128,1,float16,fp8,0,0.01632533346613248
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,8,4,128,1,float16,float16,0,0.013167999684810638
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,8,8,128,1,float16,float16,0,0.013445333888133367
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,8,8,128,1,float16,fp8,0,0.01403733342885971
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,8,8,128,1,float16,float16,0,0.01309866706530253
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,8,8,128,1,float16,fp8,0,0.013354666531085968
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,8,1,128,1,float16,float16,0,0.013674666484196981
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,8,1,128,1,float16,fp8,0,0.01584533353646596
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,8,2,128,1,float16,float16,0,0.01368533323208491
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,8,2,128,1,float16,fp8,0,0.01607999950647354
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,8,4,128,1,float16,float16,0,0.012432000289360682
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,8,4,128,1,float16,fp8,0,0.012869333227475485
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,8,8,128,1,float16,float16,0,0.013189333180586496
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,8,8,128,1,float16,fp8,0,0.013370666652917862
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,8,8,128,1,float16,float16,0,0.012442667037248611
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,8,8,128,1,float16,fp8,0,0.01251199965675672
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,8,1,128,1,float16,float16,0,0.013408000270525614
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,8,1,128,1,float16,fp8,0,0.015562667200962702
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,8,2,128,1,float16,float16,0,0.013552000125249227
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,8,2,128,1,float16,fp8,0,0.015669333438078564
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,8,4,128,1,float16,float16,0,0.01210133358836174
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,8,4,128,1,float16,fp8,0,0.012304000556468964
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,8,8,128,1,float16,float16,0,0.012586666891972223
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,8,8,128,1,float16,fp8,0,0.012389333297808966
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,8,8,128,1,float16,float16,0,0.01198400060335795
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,8,8,128,1,float16,fp8,0,0.012202666451533636
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,8,1,128,1,float16,float16,0,0.013434667140245438
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,8,1,128,1,float16,fp8,0,0.01540800059835116
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,8,2,128,1,float16,float16,0,0.013386666774749756
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,8,2,128,1,float16,fp8,0,0.015599999576807022
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,8,4,128,1,float16,float16,0,0.011418666690587997
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,8,4,128,1,float16,fp8,0,0.0120319997270902
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,8,8,128,1,float16,float16,0,0.011829332758982977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,8,8,128,1,float16,fp8,0,0.0122079998254776
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,8,1,128,1,float16,float16,0,0.02459733436505
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,8,1,128,1,float16,fp8,0,0.028757333755493164
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,8,2,128,1,float16,float16,0,0.033045334120591484
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,8,2,128,1,float16,fp8,0,0.0337119996547699
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,8,8,128,1,float16,float16,0,0.04548799991607666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,8,4,128,1,float16,float16,0,0.0580320010582606
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,8,8,128,1,float16,fp8,0,0.04795733094215393
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,8,4,128,1,float16,fp8,0,0.06106133262316386
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,8,1,128,1,float16,float16,0,0.01687466725707054
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,8,1,128,1,float16,fp8,0,0.019648000597953796
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,8,2,128,1,float16,float16,0,0.021205333371957142
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,8,2,128,1,float16,fp8,0,0.022410665949185688
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,8,4,128,1,float16,float16,0,0.03290133426586787
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,8,4,128,1,float16,fp8,0,0.03565866748491923
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,8,8,128,1,float16,float16,0,0.04641599953174591
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,8,8,128,1,float16,fp8,0,0.04786666731039683
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,8,8,128,1,float16,float16,0,0.0277813325325648
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,8,8,128,1,float16,fp8,0,0.028954667349656422
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,8,1,128,1,float16,float16,0,0.015647999942302704
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,8,1,128,1,float16,fp8,0,0.017279999951521557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,8,2,128,1,float16,float16,0,0.015002666662136713
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,8,2,128,1,float16,fp8,0,0.015882667154073715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,8,4,128,1,float16,float16,0,0.021162666380405426
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,8,4,128,1,float16,fp8,0,0.022831998765468597
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,8,8,128,1,float16,float16,0,0.027717334528764088
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,8,8,128,1,float16,fp8,0,0.029114666084448498
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,8,8,128,1,float16,float16,0,0.0182239996890227
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,8,8,128,1,float16,fp8,0,0.019679999599854153
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,8,1,128,1,float16,float16,0,0.014453332871198654
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,8,2,128,1,float16,fp8,0,0.013776000589132309
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,8,1,128,1,float16,fp8,0,0.01700266698996226
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,8,2,128,1,float16,float16,0,0.013429333766301474
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,8,4,128,1,float16,fp8,0,0.01575999955336253
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,8,4,128,1,float16,float16,0,0.015066667149464289
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,8,8,128,1,float16,float16,0,0.018330667167901993
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,8,8,128,1,float16,fp8,0,0.019632000476121902
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,8,8,128,1,float16,float16,0,0.013482666263977686
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,8,8,128,1,float16,fp8,0,0.013845333208640417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,8,1,128,1,float16,float16,0,0.014202666779359182
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,8,4,128,1,float16,fp8,0,0.014106666048367819
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,8,1,128,1,float16,fp8,0,0.01598400001724561
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,8,2,128,1,float16,float16,0,0.012655999511480331
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,8,2,128,1,float16,fp8,0,0.013343999783198038
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,8,4,128,1,float16,float16,0,0.01302933320403099
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,8,8,128,1,float16,float16,0,0.013295999417702356
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,8,8,128,1,float16,fp8,0,0.013872000078360239
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,8,8,128,1,float16,float16,0,0.013066666821638743
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,8,8,128,1,float16,fp8,0,0.013503999759753546
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,8,1,128,1,float16,float16,0,0.013482666263977686
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,8,1,128,1,float16,fp8,0,0.015696000307798386
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,8,2,128,1,float16,float16,0,0.012250666817029318
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,8,2,128,1,float16,fp8,0,0.012714666624863943
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,8,4,128,1,float16,float16,0,0.012826666235923767
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,8,4,128,1,float16,fp8,0,0.013141332815090815
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,8,8,128,1,float16,float16,0,0.012991999586423239
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,8,8,128,1,float16,fp8,0,0.013776000589132309
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,8,8,128,1,float16,float16,0,0.012815999488035837
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,8,8,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,8,1,128,1,float16,float16,0,0.01310933381319046
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,8,1,128,1,float16,fp8,0,0.015077333897352219
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,8,2,128,1,float16,float16,0,0.011802667131026586
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,8,2,128,1,float16,fp8,0,0.012256000190973282
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,8,8,128,1,float16,fp8,0,0.012272000312805176
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,8,4,128,1,float16,float16,0,0.012272000312805176
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,8,4,128,1,float16,fp8,0,0.01268799975514412
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,8,8,128,1,float16,float16,0,0.012794667234023413
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,8,8,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,8,8,128,1,float16,float16,0,0.012080000092585882
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,8,1,128,1,float16,float16,0,0.013162666310866674
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,8,1,128,1,float16,fp8,0,0.015237333873907724
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,8,2,128,1,float16,float16,0,0.011301333705584208
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,8,2,128,1,float16,fp8,0,0.012069333344697952
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,8,4,128,1,float16,float16,0,0.01181866725285848
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,8,8,128,1,float16,fp8,0,0.012096000214417776
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,8,4,128,1,float16,fp8,0,0.012159999459981918
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,8,1,128,1,float16,fp8,0,0.015157333264748255
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,8,8,128,1,float16,float16,0,0.011962667107582092
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,8,8,128,1,float16,fp8,0,0.0124746672809124
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,8,8,128,1,float16,float16,0,0.011781333635250727
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,8,1,128,1,float16,float16,0,0.012768000364303589
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,8,2,128,1,float16,float16,0,0.011658667276302973
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,8,2,128,1,float16,fp8,0,0.01190399999419848
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,8,4,128,1,float16,float16,0,0.011317333827416102
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,8,4,128,1,float16,fp8,0,0.011994666109482447
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,8,8,128,1,float16,float16,0,0.011701333026091257
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,8,8,128,1,float16,fp8,0,0.012293333808581034
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,4,1,128,1,float16,float16,0,3.7008212407430015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,4,4,128,1,float16,float16,0,2.3147199948628745
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,4,1,128,1,float16,fp8,0,3.6765174865722656
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,4,2,128,1,float16,float16,0,3.8714612325032554
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,4,1,128,1,float16,float16,0,1.902026653289795
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,4,4,128,1,float16,fp8,0,2.3704479535420737
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,4,1,128,1,float16,fp8,0,1.910048007965088
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,4,2,128,1,float16,fp8,0,3.858400026957194
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,4,2,128,1,float16,float16,0,2.0312906901041665
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,4,2,128,1,float16,fp8,0,2.0195679664611816
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,4,4,128,1,float16,float16,0,1.2353653113047283
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,4,4,128,1,float16,fp8,0,1.2598666350046794
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,4,4,128,1,float16,float16,0,2.3106667200724282
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,4,1,128,1,float16,float16,0,1.0000106493632
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,4,1,128,1,float16,fp8,0,1.094533363978068
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,4,4,128,1,float16,fp8,0,2.539957364400228
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,4,2,128,1,float16,float16,0,1.0714346567789714
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,4,2,128,1,float16,fp8,0,1.0702026685078938
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,4,4,128,1,float16,float16,0,1.219802697499593
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,4,4,128,1,float16,float16,0,0.6697546641031901
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,4,4,128,1,float16,fp8,0,1.2819146315256755
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,4,4,128,1,float16,fp8,0,0.7251306374867758
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,4,1,128,1,float16,float16,0,0.5522133509318033
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,4,1,128,1,float16,fp8,0,0.5737333297729492
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,4,2,128,1,float16,float16,0,0.5862346490224203
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,4,2,128,1,float16,fp8,0,0.5882720152537028
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,4,4,128,1,float16,float16,0,0.6684959729512533
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,4,4,128,1,float16,fp8,0,0.7172586917877197
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,4,1,128,1,float16,float16,0,2.14084800084432
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,4,1,128,1,float16,fp8,0,2.17684268951416
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,4,2,128,1,float16,float16,0,2.2970773379007974
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,4,4,128,1,float16,float16,0,1.4425493876139324
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,4,4,128,1,float16,fp8,0,1.4747254053751628
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,4,1,128,1,float16,float16,0,1.1155626773834229
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,4,2,128,1,float16,fp8,0,2.296234607696533
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,4,1,128,1,float16,fp8,0,1.266640027364095
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,4,2,128,1,float16,float16,0,1.201248009999593
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,4,2,128,1,float16,fp8,0,1.2185226281483967
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,4,4,128,1,float16,float16,0,1.444485346476237
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,4,4,128,1,float16,float16,0,0.7613920370737711
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,4,4,128,1,float16,fp8,0,1.474778652191162
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,4,4,128,1,float16,fp8,0,0.8137333393096924
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,4,1,128,1,float16,float16,0,0.6096266508102417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,4,1,128,1,float16,fp8,0,0.625327984491984
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,4,2,128,1,float16,float16,0,0.6536746819814047
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,4,2,128,1,float16,fp8,0,0.6649173498153687
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,4,4,128,1,float16,float16,0,0.7671519915262858
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,4,4,128,1,float16,float16,0,0.4298773209253947
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,4,4,128,1,float16,fp8,0,0.8107519944508871
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,4,4,128,1,float16,fp8,0,0.46676798661549884
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,4,1,128,1,float16,float16,0,0.3381813367207845
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,4,1,128,1,float16,fp8,0,0.3537973165512085
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,4,2,128,1,float16,float16,0,0.37698133786519367
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,4,2,128,1,float16,fp8,0,0.3831413189570109
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,4,4,128,1,float16,float16,0,0.43164801597595215
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,4,4,128,1,float16,fp8,0,0.462063988049825
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,4,1,128,1,float16,float16,0,1.5306080182393391
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,4,1,128,1,float16,fp8,0,1.5662773450215657
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,4,2,128,1,float16,fp8,0,1.6686773300170898
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,4,2,128,1,float16,float16,0,1.661253293355306
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,4,4,128,1,float16,float16,0,1.05732266108195
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,4,1,128,1,float16,float16,0,0.8090026378631592
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,4,4,128,1,float16,fp8,0,1.1308480103810628
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,4,1,128,1,float16,fp8,0,0.8215093612670898
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,4,2,128,1,float16,float16,0,0.8867466449737549
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,4,2,128,1,float16,fp8,0,0.8879786332448324
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,4,4,128,1,float16,float16,0,1.05294402440389
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,4,4,128,1,float16,float16,0,0.5740480025609335
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,4,4,128,1,float16,fp8,0,1.1298186779022217
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,4,2,128,1,float16,fp8,0,0.49411733945210773
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,4,4,128,1,float16,fp8,0,0.6107199986775717
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,4,1,128,1,float16,float16,0,0.4447253147761027
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,4,1,128,1,float16,fp8,0,0.4572906494140625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,4,4,128,1,float16,fp8,0,0.35846932729085285
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,4,2,128,1,float16,float16,0,0.48874131838480633
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,4,4,128,1,float16,float16,0,0.568069338798523
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,4,2,128,1,float16,float16,0,0.27722134192784625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,4,4,128,1,float16,fp8,0,0.6121013164520264
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,4,4,128,1,float16,float16,0,0.33714667956034344
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,4,1,128,1,float16,float16,0,0.2555999954541524
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,4,1,128,1,float16,fp8,0,0.2738186717033386
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,4,2,128,1,float16,fp8,0,0.28279467423756915
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,4,4,128,1,float16,float16,0,0.3332693378130595
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,4,4,128,1,float16,fp8,0,0.35839466253916424
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,4,1,128,1,float16,float16,0,1.9875574111938477
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,4,1,128,1,float16,fp8,0,2.0245973269144693
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,4,4,128,1,float16,float16,0,1.4280212720235188
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,4,2,128,1,float16,float16,0,2.1968372662862143
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,4,2,128,1,float16,fp8,0,2.170037269592285
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,4,1,128,1,float16,float16,0,1.0164053440093994
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,4,4,128,1,float16,fp8,0,1.502197265625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,4,1,128,1,float16,fp8,0,1.0424586931864421
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,4,2,128,1,float16,float16,0,1.1393760045369465
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,4,2,128,1,float16,fp8,0,1.1374239921569824
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,4,4,128,1,float16,float16,0,1.4277067184448242
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,4,4,128,1,float16,float16,0,0.7529813448588053
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,4,4,128,1,float16,fp8,0,1.517738660176595
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,4,4,128,1,float16,fp8,0,0.8033013343811035
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,4,1,128,1,float16,float16,0,0.541317343711853
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,4,1,128,1,float16,fp8,0,0.5595680077870687
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,4,2,128,1,float16,float16,0,0.6122026840845743
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,4,2,128,1,float16,fp8,0,0.6144213279088339
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,4,4,128,1,float16,float16,0,0.7480800151824951
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,4,4,128,1,float16,fp8,0,0.7966453234354655
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,4,4,128,1,float16,float16,0,0.417466680208842
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,4,4,128,1,float16,fp8,0,0.4504266579945882
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,4,1,128,1,float16,float16,0,0.30082666873931885
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,4,1,128,1,float16,fp8,0,0.3134666681289673
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,4,2,128,1,float16,float16,0,0.3408213456471761
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,4,2,128,1,float16,fp8,0,0.34591468175252277
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,4,4,128,1,float16,float16,0,0.4142293135325114
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,4,4,128,1,float16,fp8,0,0.44841599464416504
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,4,4,128,1,float16,float16,0,0.2453119953473409
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,4,4,128,1,float16,fp8,0,0.2551093300183614
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,4,1,128,1,float16,float16,0,0.1764799952507019
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,4,1,128,1,float16,fp8,0,0.18861865997314453
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,4,2,128,1,float16,float16,0,0.20148799816767374
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,4,2,128,1,float16,fp8,0,0.21029865741729736
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,4,4,128,1,float16,float16,0,0.24503999948501587
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,4,4,128,1,float16,fp8,0,0.2619253396987915
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,4,1,128,1,float16,float16,0,1.182165304819743
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,4,1,128,1,float16,fp8,0,1.211077372233073
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,4,2,128,1,float16,float16,0,1.3311093648274739
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,4,4,128,1,float16,float16,0,0.9102026621500651
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,4,2,128,1,float16,fp8,0,1.3403733571370442
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,4,4,128,1,float16,fp8,0,0.982479969660441
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,4,1,128,1,float16,float16,0,0.6130719979604086
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,4,1,128,1,float16,fp8,0,0.638213316599528
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,4,2,128,1,float16,float16,0,0.7084799607594808
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,4,2,128,1,float16,fp8,0,0.7085813681284586
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,4,4,128,1,float16,float16,0,0.9121599992116293
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,4,4,128,1,float16,float16,0,0.49322664737701416
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,4,1,128,1,float16,fp8,0,0.3555146853129069
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,4,4,128,1,float16,fp8,0,0.977130651473999
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,4,4,128,1,float16,fp8,0,0.5315786600112915
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,4,1,128,1,float16,float16,0,0.33475732803344727
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,4,2,128,1,float16,float16,0,0.3845173517862956
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,4,2,128,1,float16,fp8,0,0.39047467708587646
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,4,4,128,1,float16,float16,0,0.49322132269541424
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,4,4,128,1,float16,fp8,0,0.5349920193354288
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,4,1,128,1,float16,fp8,0,0.20408533016840616
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,4,4,128,1,float16,float16,0,0.27553067604700726
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,4,4,128,1,float16,fp8,0,0.30409600337346393
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,4,1,128,1,float16,float16,0,0.1916159987449646
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,4,2,128,1,float16,float16,0,0.2269173264503479
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,4,2,128,1,float16,fp8,0,0.21785600980122885
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,4,4,128,1,float16,float16,0,0.27720000346501666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,4,4,128,1,float16,float16,0,0.15531733632087708
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,4,4,128,1,float16,fp8,0,0.30347200234731037
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,4,4,128,1,float16,fp8,0,0.17124267419179282
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,4,1,128,1,float16,float16,0,0.12354666988054912
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,4,1,128,1,float16,fp8,0,0.1312266687552134
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,4,2,128,1,float16,float16,0,0.1313759982585907
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,4,2,128,1,float16,fp8,0,0.13943466544151306
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,4,4,128,1,float16,float16,0,0.15980799992879233
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,4,4,128,1,float16,fp8,0,0.1690773367881775
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,4,1,128,1,float16,float16,0,1.1203146775563557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,4,1,128,1,float16,fp8,0,1.1799573103586833
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,4,2,128,1,float16,float16,0,1.3512214024861653
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,4,2,128,1,float16,fp8,0,1.3306879997253418
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,4,4,128,1,float16,float16,0,0.9868000348409017
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,4,4,128,1,float16,fp8,0,1.0552159945170085
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,4,1,128,1,float16,float16,0,0.5899946689605713
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,4,1,128,1,float16,fp8,0,0.6134293476740519
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,4,2,128,1,float16,float16,0,0.6893119812011719
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,4,2,128,1,float16,fp8,0,0.6985173225402832
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,4,4,128,1,float16,float16,0,0.9874239762624105
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,4,4,128,1,float16,float16,0,0.5140853325525919
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,4,4,128,1,float16,fp8,0,1.058666706085205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,4,4,128,1,float16,fp8,0,0.5620160102844238
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,4,1,128,1,float16,float16,0,0.31779734293619794
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,4,1,128,1,float16,fp8,0,0.3327680031458537
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,4,2,128,1,float16,float16,0,0.37798933188120526
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,4,2,128,1,float16,fp8,0,0.37802668412526447
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,4,4,128,1,float16,float16,0,0.5126026471455892
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,4,4,128,1,float16,fp8,0,0.5605493386586508
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,4,4,128,1,float16,float16,0,0.283135990301768
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,4,4,128,1,float16,fp8,0,0.316101332505544
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,4,1,128,1,float16,float16,0,0.18159999450047812
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,4,1,128,1,float16,fp8,0,0.186298668384552
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,4,2,128,1,float16,float16,0,0.21792000532150269
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,4,2,128,1,float16,fp8,0,0.214303990205129
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,4,4,128,1,float16,float16,0,0.2841813365618388
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,4,4,128,1,float16,fp8,0,0.31567466259002686
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,4,4,128,1,float16,float16,0,0.16184000174204508
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,4,4,128,1,float16,fp8,0,0.17462400595347086
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,4,1,128,1,float16,float16,0,0.10776000221570332
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,4,1,128,1,float16,fp8,0,0.11647466818491618
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,4,2,128,1,float16,float16,0,0.12202133735020955
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,4,4,128,1,float16,float16,0,0.1632213294506073
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,4,2,128,1,float16,fp8,0,0.12824533383051553
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,4,4,128,1,float16,float16,0,0.08246933420499165
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,4,4,128,1,float16,fp8,0,0.17574399709701538
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,4,4,128,1,float16,fp8,0,0.09428800145785014
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,4,1,128,1,float16,float16,0,0.06650666892528534
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,4,1,128,1,float16,fp8,0,0.07287999987602234
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,4,2,128,1,float16,fp8,0,0.07982400059700012
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,4,2,128,1,float16,float16,0,0.07146666447321574
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,4,4,128,1,float16,float16,0,0.08297066887219746
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,4,4,128,1,float16,fp8,0,0.0944106678167979
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,4,1,128,1,float16,float16,0,0.7517706553141276
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,4,1,128,1,float16,fp8,0,0.7252053419748942
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,4,2,128,1,float16,fp8,0,0.8464586734771729
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,4,2,128,1,float16,float16,0,0.9224212964375814
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,4,4,128,1,float16,float16,0,0.6971999804178873
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,4,4,128,1,float16,fp8,0,0.7280480066935221
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,4,1,128,1,float16,float16,0,0.3965493440628052
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,4,1,128,1,float16,fp8,0,0.38466668128967285
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,4,2,128,1,float16,float16,0,0.4845386743545532
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,4,2,128,1,float16,fp8,0,0.4479786554972331
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,4,4,128,1,float16,float16,0,0.6895733674367269
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,4,4,128,1,float16,float16,0,0.3688053290049235
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,4,4,128,1,float16,fp8,0,0.7354506651560465
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,4,4,128,1,float16,fp8,0,0.3903733491897583
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,4,1,128,1,float16,float16,0,0.2171893318494161
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,4,1,128,1,float16,fp8,0,0.21611199776331583
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,4,2,128,1,float16,float16,0,0.2675253351529439
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,4,2,128,1,float16,fp8,0,0.24602667490641275
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,4,4,128,1,float16,float16,0,0.36775465806325275
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,4,4,128,1,float16,float16,0,0.20324265956878662
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,4,4,128,1,float16,fp8,0,0.39443198839823407
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,4,4,128,1,float16,fp8,0,0.21774399280548096
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,4,1,128,1,float16,float16,0,0.12381333112716675
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,4,1,128,1,float16,fp8,0,0.11945066849390666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,4,2,128,1,float16,float16,0,0.15202666322390238
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,4,2,128,1,float16,fp8,0,0.14040000240008035
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,4,4,128,1,float16,float16,0,0.20270399252573648
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,4,4,128,1,float16,fp8,0,0.21651200453440347
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,4,4,128,1,float16,float16,0,0.1111199955145518
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,4,4,128,1,float16,fp8,0,0.1123413344224294
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,4,1,128,1,float16,float16,0,0.07493866483370464
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,4,1,128,1,float16,fp8,0,0.07584000130494435
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,4,2,128,1,float16,float16,0,0.08552533388137817
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,4,2,128,1,float16,fp8,0,0.08773333827654521
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,4,4,128,1,float16,float16,0,0.10958400368690491
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,4,4,128,1,float16,fp8,0,0.11834667126337688
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,4,4,128,1,float16,float16,0,0.06615466872851054
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,4,4,128,1,float16,fp8,0,0.0710399995247523
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,4,1,128,1,float16,float16,0,0.05407999952634176
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,4,1,128,1,float16,fp8,0,0.055215999484062195
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,4,2,128,1,float16,float16,0,0.05796800057093302
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,4,2,128,1,float16,fp8,0,0.05982399980227152
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,4,4,128,1,float16,float16,0,0.0663679987192154
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,4,4,128,1,float16,fp8,0,0.07133333384990692
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,4,1,128,1,float16,float16,0,0.7778879801432291
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,4,1,128,1,float16,fp8,0,0.7687359650929769
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,4,4,128,1,float16,fp8,0,0.856607993443807
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,4,4,128,1,float16,float16,0,0.7920959790547689
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,4,1,128,1,float16,float16,0,0.40644800662994385
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,4,2,128,1,float16,float16,0,0.9883519808451334
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,4,1,128,1,float16,fp8,0,0.4052746693293254
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,4,2,128,1,float16,float16,0,0.5019573370615641
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,4,2,128,1,float16,fp8,0,0.9291679859161377
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,4,2,128,1,float16,fp8,0,0.48123733202616376
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,4,4,128,1,float16,float16,0,0.7979573408762614
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,4,4,128,1,float16,fp8,0,0.8480853239695231
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,4,4,128,1,float16,float16,0,0.41178667545318604
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,4,1,128,1,float16,float16,0,0.22145066658655801
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,4,4,128,1,float16,float16,0,0.4194186528523763
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,4,2,128,1,float16,fp8,0,0.26079465945561725
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,4,4,128,1,float16,fp8,0,0.44731732209523517
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,4,1,128,1,float16,fp8,0,0.21958933273951212
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,4,2,128,1,float16,float16,0,0.2748426596323649
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,4,4,128,1,float16,fp8,0,0.44515732924143475
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,4,4,128,1,float16,float16,0,0.22474133968353271
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,4,1,128,1,float16,float16,0,0.12528000275293985
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,4,4,128,1,float16,fp8,0,0.24570133288701376
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,4,1,128,1,float16,fp8,0,0.116976002852122
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,4,2,128,1,float16,float16,0,0.15481600165367126
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,4,2,128,1,float16,fp8,0,0.14813866217931113
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,4,1,128,1,float16,float16,0,0.07297599812348683
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,4,4,128,1,float16,float16,0,0.22427199284235635
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,4,4,128,1,float16,fp8,0,0.24211200078328451
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,4,4,128,1,float16,float16,0,0.12546133001645407
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,4,4,128,1,float16,fp8,0,0.12186666329701741
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,4,4,128,1,float16,fp8,0,0.12827199697494507
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,4,1,128,1,float16,fp8,0,0.07326399783293407
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,4,2,128,1,float16,float16,0,0.08262933293978374
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,4,2,128,1,float16,fp8,0,0.0846560001373291
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,4,4,128,1,float16,float16,0,0.12398933370908101
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,4,2,128,1,float16,float16,0,0.04853333532810211
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,4,4,128,1,float16,float16,0,0.05931733548641205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,4,4,128,1,float16,fp8,0,0.06460799773534139
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,4,1,128,1,float16,float16,0,0.04331733286380768
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,4,1,128,1,float16,fp8,0,0.04463466505209605
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,4,2,128,1,float16,fp8,0,0.0496319979429245
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,4,4,128,1,float16,float16,0,0.0591893345117569
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,4,4,128,1,float16,fp8,0,0.06451199948787689
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,4,4,128,1,float16,float16,0,0.04794133206208547
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,4,4,128,1,float16,fp8,0,0.05085866649945577
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,4,1,128,1,float16,float16,0,0.0396373321612676
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,4,1,128,1,float16,fp8,0,0.04083200047413508
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,4,2,128,1,float16,float16,0,0.04248000184694926
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,4,2,128,1,float16,fp8,0,0.04444266855716705
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,4,4,128,1,float16,float16,0,0.04779199759165446
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,4,4,128,1,float16,fp8,0,0.051311999559402466
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,4,1,128,1,float16,float16,0,0.5094186862309774
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,4,1,128,1,float16,fp8,0,0.518122673034668
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,4,4,128,1,float16,float16,0,0.5616000096003214
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,4,1,128,1,float16,float16,0,0.27078400055567425
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,4,2,128,1,float16,float16,0,0.6800906658172607
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,4,2,128,1,float16,fp8,0,0.640496015548706
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,4,4,128,1,float16,fp8,0,0.6267199913660685
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,4,1,128,1,float16,fp8,0,0.2768639922142029
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,4,2,128,1,float16,float16,0,0.3515466849009196
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,4,2,128,1,float16,fp8,0,0.3380800088246663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,4,4,128,1,float16,float16,0,0.5639839967091879
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,4,4,128,1,float16,fp8,0,0.6240213314692179
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,4,4,128,1,float16,float16,0,0.29572800795237225
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,4,1,128,1,float16,float16,0,0.1492800017197927
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,4,4,128,1,float16,fp8,0,0.33082133531570435
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,4,1,128,1,float16,fp8,0,0.15476266543070474
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,4,2,128,1,float16,float16,0,0.19236799081166586
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,4,2,128,1,float16,fp8,0,0.1859040061632792
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,4,4,128,1,float16,float16,0,0.2961706717809041
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,4,1,128,1,float16,float16,0,0.0823413332303365
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,4,4,128,1,float16,fp8,0,0.32654400666554767
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,4,2,128,1,float16,float16,0,0.1111306647459666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,4,4,128,1,float16,float16,0,0.15959466497103372
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,4,4,128,1,float16,fp8,0,0.17940266927083334
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,4,1,128,1,float16,fp8,0,0.08345599969228108
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,4,2,128,1,float16,fp8,0,0.10213333368301392
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,4,4,128,1,float16,fp8,0,0.08813866972923279
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,4,1,128,1,float16,float16,0,0.05128533144791921
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,4,4,128,1,float16,float16,0,0.16155733664830527
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,4,2,128,1,float16,float16,0,0.05952000121275584
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,4,4,128,1,float16,fp8,0,0.1809119979540507
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,4,4,128,1,float16,float16,0,0.079434668024381
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,4,1,128,1,float16,fp8,0,0.05402666827042898
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,4,2,128,1,float16,fp8,0,0.06412266691525777
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,4,4,128,1,float16,float16,0,0.082997332016627
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,4,4,128,1,float16,fp8,0,0.08744532863299052
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,4,4,128,1,float16,float16,0,0.047151997685432434
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,4,4,128,1,float16,fp8,0,0.054245332876841225
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,4,1,128,1,float16,float16,0,0.03519999980926514
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,4,1,128,1,float16,fp8,0,0.03734400123357773
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,4,2,128,1,float16,float16,0,0.03886933376391729
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,4,2,128,1,float16,fp8,0,0.042170668641726174
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,4,4,128,1,float16,float16,0,0.04710400104522705
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,4,4,128,1,float16,fp8,0,0.054474666714668274
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,4,4,128,1,float16,float16,0,0.03882666677236557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,4,4,128,1,float16,fp8,0,0.04380266865094503
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,4,1,128,1,float16,float16,0,0.033039999504884086
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,4,1,128,1,float16,fp8,0,0.03509866694609324
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,4,2,128,1,float16,float16,0,0.03482666611671448
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,4,2,128,1,float16,fp8,0,0.03859733293453852
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,4,4,128,1,float16,float16,0,0.03904533386230469
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,4,4,128,1,float16,fp8,0,0.04426133135954539
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,4,1,128,1,float16,float16,0,0.5680106480916342
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,4,1,128,1,float16,float16,0,0.29789332548777264
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,4,1,128,1,float16,fp8,0,0.3124693234761556
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,4,1,128,1,float16,fp8,0,0.5921386480331421
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,4,2,128,1,float16,float16,0,0.39025600751241046
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,4,2,128,1,float16,float16,0,0.7775519688924154
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,4,4,128,1,float16,float16,0,0.6938079992930094
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,4,2,128,1,float16,fp8,0,0.39079467455546063
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,4,4,128,1,float16,fp8,0,0.7720106442769369
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,4,2,128,1,float16,fp8,0,0.7562879721323649
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,4,4,128,1,float16,float16,0,0.6843199729919434
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,4,4,128,1,float16,float16,0,0.35579200585683185
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,4,4,128,1,float16,fp8,0,0.7529333432515463
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,4,4,128,1,float16,fp8,0,0.40385599931081134
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,4,1,128,1,float16,fp8,0,0.17053866386413574
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,4,1,128,1,float16,float16,0,0.16104533274968466
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,4,2,128,1,float16,float16,0,0.2248106598854065
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,4,4,128,1,float16,fp8,0,0.4038986762364705
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,4,4,128,1,float16,float16,0,0.35679467519124347
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,4,1,128,1,float16,float16,0,0.09107200304667155
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,4,4,128,1,float16,fp8,0,0.21919467051823935
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,4,1,128,1,float16,fp8,0,0.09251733620961507
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,4,2,128,1,float16,fp8,0,0.21022933721542358
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,4,4,128,1,float16,float16,0,0.18919465939203897
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,4,2,128,1,float16,float16,0,0.12029332915941875
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,4,2,128,1,float16,fp8,0,0.11833600203196208
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,4,4,128,1,float16,float16,0,0.18985066811243692
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,4,4,128,1,float16,fp8,0,0.21665600935618082
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,4,4,128,1,float16,float16,0,0.1039573351542155
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,4,4,128,1,float16,fp8,0,0.10904533664385478
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,4,1,128,1,float16,float16,0,0.05099200208981832
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,4,1,128,1,float16,fp8,0,0.054885332783063255
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,4,2,128,1,float16,float16,0,0.061530664563179016
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,4,2,128,1,float16,fp8,0,0.06417066852251689
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,4,4,128,1,float16,float16,0,0.10411733388900757
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,4,4,128,1,float16,fp8,0,0.11243733763694763
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,4,4,128,1,float16,float16,0,0.04658666749795278
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,4,4,128,1,float16,fp8,0,0.05469333132108053
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,4,1,128,1,float16,float16,0,0.030778666337331135
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,4,1,128,1,float16,fp8,0,0.03404266635576884
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,4,2,128,1,float16,float16,0,0.03565866748491923
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,4,2,128,1,float16,fp8,0,0.038880000511805214
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,4,4,128,1,float16,float16,0,0.04626133541266123
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,4,4,128,1,float16,fp8,0,0.05486933390299479
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,4,4,128,1,float16,float16,0,0.03530666728814443
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,4,4,128,1,float16,fp8,0,0.04125866790612539
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,4,1,128,1,float16,float16,0,0.02759466568628947
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,4,1,128,1,float16,fp8,0,0.0305173322558403
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,4,4,128,1,float16,float16,0,0.029765332738558452
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,4,2,128,1,float16,float16,0,0.029493334392706554
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,4,2,128,1,float16,fp8,0,0.03377600014209747
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,4,4,128,1,float16,float16,0,0.03537066777547201
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,4,4,128,1,float16,fp8,0,0.04115733255942663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,4,4,128,1,float16,fp8,0,0.03429866582155228
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,4,1,128,1,float16,float16,0,0.02608533451954524
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,4,1,128,1,float16,fp8,0,0.02855466554562251
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,4,2,128,1,float16,float16,0,0.027002667387326557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,4,2,128,1,float16,fp8,0,0.030266667405764263
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,4,4,128,1,float16,float16,0,0.029706666866938274
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,4,4,128,1,float16,fp8,0,0.034517332911491394
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,4,1,128,1,float16,float16,0,0.21498666206995645
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,4,1,128,1,float16,float16,0,0.40748266379038495
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,4,1,128,1,float16,fp8,0,0.4501066605250041
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,4,1,128,1,float16,fp8,0,0.2392639915148417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,4,2,128,1,float16,float16,0,0.6187040011088053
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,4,4,128,1,float16,float16,0,0.5996373494466146
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,4,2,128,1,float16,float16,0,0.30728532870610553
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,4,4,128,1,float16,fp8,0,0.6972693602244059
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,4,2,128,1,float16,fp8,0,0.6207199891408285
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,4,2,128,1,float16,fp8,0,0.32386666536331177
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,4,4,128,1,float16,float16,0,0.6004000107447306
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,4,4,128,1,float16,fp8,0,0.368559996287028
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,4,4,128,1,float16,fp8,0,0.6898079713185629
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,4,1,128,1,float16,float16,0,0.11796800295511882
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,4,2,128,1,float16,float16,0,0.17819732427597046
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,4,4,128,1,float16,float16,0,0.31243733565012616
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,4,4,128,1,float16,float16,0,0.31167999903361004
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,4,2,128,1,float16,fp8,0,0.17602666219075522
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,4,4,128,1,float16,fp8,0,0.20167466004689535
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,4,1,128,1,float16,float16,0,0.06710933148860931
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,4,4,128,1,float16,fp8,0,0.3722826639811198
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,4,1,128,1,float16,fp8,0,0.13078932960828146
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,4,1,128,1,float16,fp8,0,0.07283199826876323
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,4,4,128,1,float16,float16,0,0.1678719917933146
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,4,2,128,1,float16,fp8,0,0.1002506713072459
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,4,4,128,1,float16,float16,0,0.16689600547154745
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,4,2,128,1,float16,float16,0,0.09910933176676433
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,4,4,128,1,float16,fp8,0,0.20055466890335083
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,4,4,128,1,float16,float16,0,0.0892693301041921
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,4,4,128,1,float16,fp8,0,0.0922933320204417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,4,1,128,1,float16,float16,0,0.037274666130542755
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,4,1,128,1,float16,fp8,0,0.0428959975639979
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,4,2,128,1,float16,float16,0,0.04740266501903534
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,4,2,128,1,float16,fp8,0,0.056032001972198486
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,4,4,128,1,float16,float16,0,0.08935999870300293
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,4,4,128,1,float16,fp8,0,0.09557867050170898
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,4,4,128,1,float16,float16,0,0.039664000272750854
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,4,4,128,1,float16,fp8,0,0.04978133241335551
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,4,1,128,1,float16,float16,0,0.023738667368888855
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,4,1,128,1,float16,fp8,0,0.028853334486484528
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,4,2,128,1,float16,float16,0,0.028927999238173168
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,4,2,128,1,float16,fp8,0,0.035071998834609985
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,4,4,128,1,float16,float16,0,0.039621333281199135
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,4,4,128,1,float16,fp8,0,0.04995200037956238
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,4,4,128,1,float16,float16,0,0.0290133332212766
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,4,4,128,1,float16,fp8,0,0.036602665980656944
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,4,1,128,1,float16,float16,0,0.021231998999913532
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,4,1,128,1,float16,fp8,0,0.025424001117547352
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,4,2,128,1,float16,float16,0,0.023386667172114056
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,4,2,128,1,float16,fp8,0,0.029946667452653248
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,4,4,128,1,float16,float16,0,0.028864001234372456
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,4,4,128,1,float16,fp8,0,0.03674133370320002
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,4,4,128,1,float16,float16,0,0.023631999890009563
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,4,4,128,1,float16,fp8,0,0.029440000653266907
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,4,1,128,1,float16,float16,0,0.01964266722400983
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,4,1,128,1,float16,fp8,0,0.02380266785621643
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,4,2,128,1,float16,float16,0,0.020576000213623047
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,4,2,128,1,float16,fp8,0,0.026213333010673523
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,4,4,128,1,float16,float16,0,0.02348266790310542
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,4,4,128,1,float16,fp8,0,0.029839999973773956
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,4,4,128,1,float16,float16,0,0.02029866725206375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,4,4,128,1,float16,fp8,0,0.025013332565625507
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,4,1,128,1,float16,float16,0,0.019226666539907455
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,4,1,128,1,float16,fp8,0,0.02242133269707362
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,4,2,128,1,float16,float16,0,0.019487999379634857
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,4,2,128,1,float16,fp8,0,0.02366400013367335
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,4,4,128,1,float16,float16,0,0.02051199972629547
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,4,4,128,1,float16,fp8,0,0.02531733363866806
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,4,1,128,1,float16,float16,0,0.18911999464035034
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,4,1,128,1,float16,fp8,0,0.21394666035970053
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,4,4,128,1,float16,float16,0,0.2984586755434672
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,4,2,128,1,float16,float16,0,0.28377066055933636
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,4,1,128,1,float16,float16,0,0.10321600238482158
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,4,1,128,1,float16,fp8,0,0.11930666367212932
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,4,4,128,1,float16,fp8,0,0.3503520091374715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,4,2,128,1,float16,fp8,0,0.29850133260091144
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,4,4,128,1,float16,float16,0,0.29971200227737427
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,4,4,128,1,float16,float16,0,0.15957333644231161
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,4,4,128,1,float16,fp8,0,0.3496640125910441
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,4,2,128,1,float16,float16,0,0.1584160029888153
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,4,2,128,1,float16,fp8,0,0.16237866878509521
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,4,4,128,1,float16,fp8,0,0.18865066766738892
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,4,1,128,1,float16,float16,0,0.05973866581916809
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,4,1,128,1,float16,fp8,0,0.06400000055631001
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,4,2,128,1,float16,float16,0,0.09099200367927551
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,4,2,128,1,float16,fp8,0,0.09238933523495992
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,4,4,128,1,float16,float16,0,0.16049066185951233
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,4,4,128,1,float16,fp8,0,0.19107200702031454
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,4,4,128,1,float16,float16,0,0.08359466989835103
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,4,4,128,1,float16,fp8,0,0.08338133494059245
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,4,1,128,1,float16,float16,0,0.03160000095764796
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,4,1,128,1,float16,fp8,0,0.0374293327331543
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,4,2,128,1,float16,float16,0,0.042117332418759666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,4,2,128,1,float16,fp8,0,0.04744000236193339
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,4,4,128,1,float16,float16,0,0.08433600266774495
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,4,4,128,1,float16,fp8,0,0.08417066931724548
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,4,4,128,1,float16,float16,0,0.03638399889071783
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,4,4,128,1,float16,fp8,0,0.046666666865348816
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,4,1,128,1,float16,float16,0,0.02111999938885371
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,4,1,128,1,float16,fp8,0,0.024832000335057575
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,4,2,128,1,float16,float16,0,0.026021334032217663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,4,2,128,1,float16,fp8,0,0.031189332405726116
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,4,4,128,1,float16,float16,0,0.03643733263015747
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,4,4,128,1,float16,fp8,0,0.046911999583244324
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,4,4,128,1,float16,float16,0,0.02586666742960612
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,4,4,128,1,float16,fp8,0,0.03369066615899404
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,4,1,128,1,float16,float16,0,0.018378666291634243
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,4,1,128,1,float16,fp8,0,0.02141333371400833
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,4,2,128,1,float16,float16,0,0.020794666061798733
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,4,2,128,1,float16,fp8,0,0.02621866762638092
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,4,4,128,1,float16,float16,0,0.026165333886941273
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,4,4,128,1,float16,fp8,0,0.03306133300065994
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,4,4,128,1,float16,float16,0,0.0205226664741834
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,4,4,128,1,float16,fp8,0,0.026565333207448322
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,4,1,128,1,float16,float16,0,0.016821333517630894
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,4,4,128,1,float16,float16,0,0.020560000091791153
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,4,4,128,1,float16,fp8,0,0.026837334036827087
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,4,1,128,1,float16,fp8,0,0.020448000480731327
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,4,2,128,1,float16,float16,0,0.018079999834299088
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,4,2,128,1,float16,fp8,0,0.02312533309062322
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,4,4,128,1,float16,float16,0,0.01785600061217944
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,4,4,128,1,float16,fp8,0,0.021695998807748158
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,4,1,128,1,float16,float16,0,0.015573333948850632
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,4,1,128,1,float16,fp8,0,0.01836799954374631
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,4,2,128,1,float16,float16,0,0.01611199975013733
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,4,2,128,1,float16,fp8,0,0.019653332730134327
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,4,4,128,1,float16,float16,0,0.017488000293572743
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,4,4,128,1,float16,fp8,0,0.02161066730817159
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,4,4,128,1,float16,float16,0,0.015856000284353893
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,4,4,128,1,float16,fp8,0,0.0194560003777345
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,4,1,128,1,float16,float16,0,0.015439999600251516
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,4,1,128,1,float16,fp8,0,0.01817600056529045
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,4,2,128,1,float16,float16,0,0.01544533297419548
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,4,2,128,1,float16,fp8,0,0.019258666783571243
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,4,4,128,1,float16,float16,0,0.015925332903862
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,4,4,128,1,float16,fp8,0,0.019274666905403137
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,4,1,128,1,float16,float16,0,0.0995093286037445
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,4,1,128,1,float16,fp8,0,0.10974400242169698
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,4,4,128,1,float16,float16,0,0.15708800156911215
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,4,2,128,1,float16,float16,0,0.16080000003178915
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,4,4,128,1,float16,fp8,0,0.18070934216181436
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,4,2,128,1,float16,fp8,0,0.14774399995803833
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,4,1,128,1,float16,float16,0,0.057248001297314964
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,4,1,128,1,float16,fp8,0,0.05751466751098633
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,4,2,128,1,float16,float16,0,0.08798933029174805
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,4,4,128,1,float16,fp8,0,0.17686933279037476
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,4,2,128,1,float16,fp8,0,0.08319999774297078
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,4,4,128,1,float16,float16,0,0.15717333555221558
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,4,4,128,1,float16,float16,0,0.08189333478609721
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,4,4,128,1,float16,fp8,0,0.07926400005817413
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,4,1,128,1,float16,float16,0,0.029050665597120922
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,4,1,128,1,float16,fp8,0,0.032885332902272545
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,4,2,128,1,float16,float16,0,0.03856533269087473
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,4,2,128,1,float16,fp8,0,0.04166933397452036
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,4,4,128,1,float16,float16,0,0.08118933439254761
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,4,4,128,1,float16,fp8,0,0.07303466896216075
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,4,4,128,1,float16,float16,0,0.03488533447186152
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,4,4,128,1,float16,fp8,0,0.042208001017570496
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,4,1,128,1,float16,float16,0,0.01939733326435089
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,4,1,128,1,float16,fp8,0,0.022709332406520844
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,4,2,128,1,float16,float16,0,0.024271999796231587
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,4,2,128,1,float16,fp8,0,0.027653334041436512
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,4,4,128,1,float16,float16,0,0.03502399971087774
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,4,4,128,1,float16,fp8,0,0.04230933388074239
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,4,4,128,1,float16,float16,0,0.02454400062561035
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,4,4,128,1,float16,fp8,0,0.029189333319664
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,4,1,128,1,float16,float16,0,0.016869333883126576
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,4,1,128,1,float16,fp8,0,0.01947733387351036
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,4,2,128,1,float16,float16,0,0.01929066702723503
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,4,2,128,1,float16,fp8,0,0.02242133269707362
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,4,4,128,1,float16,float16,0,0.02443733314673106
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,4,4,128,1,float16,fp8,0,0.02922133356332779
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,4,4,128,1,float16,float16,0,0.019167999426523846
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,4,4,128,1,float16,fp8,0,0.022533332308133442
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,4,1,128,1,float16,float16,0,0.015370666980743408
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,4,1,128,1,float16,fp8,0,0.018122666825850803
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,4,2,128,1,float16,float16,0,0.01651200031240781
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,4,2,128,1,float16,fp8,0,0.019088000059127808
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,4,4,128,1,float16,float16,0,0.018863999595244724
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,4,4,128,1,float16,fp8,0,0.022426667312781017
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,4,4,128,1,float16,float16,0,0.01581866666674614
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,4,4,128,1,float16,fp8,0,0.018346666047970455
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,4,1,128,1,float16,float16,0,0.014352000008026758
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,4,1,128,1,float16,fp8,0,0.016458666572968166
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,4,2,128,1,float16,float16,0,0.014639999717473984
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,4,2,128,1,float16,fp8,0,0.017653333644072216
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,4,4,128,1,float16,float16,0,0.01553600033124288
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,4,4,128,1,float16,fp8,0,0.018570666511853535
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,4,4,128,1,float16,float16,0,0.014080000420411428
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,4,4,128,1,float16,fp8,0,0.016447999825080235
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,4,1,128,1,float16,float16,0,0.014074667046467463
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,4,1,128,1,float16,fp8,0,0.01590399940808614
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,4,2,128,1,float16,float16,0,0.013877333452304205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,4,2,128,1,float16,fp8,0,0.016250666230916977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,4,4,128,1,float16,float16,0,0.014122666170199713
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,4,4,128,1,float16,fp8,0,0.01651200031240781
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,4,4,128,1,float16,float16,0,0.013722666849692663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,4,4,128,1,float16,fp8,0,0.015967999895413715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,4,1,128,1,float16,float16,0,0.013877333452304205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,4,1,128,1,float16,fp8,0,0.015610666324694952
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,4,2,128,1,float16,float16,0,0.013455999394257864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,4,2,128,1,float16,fp8,0,0.016186666985352833
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,4,4,128,1,float16,float16,0,0.013658666362365087
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,4,4,128,1,float16,fp8,0,0.016058667252461117
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,4,1,128,1,float16,float16,0,0.05569600065549215
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,4,1,128,1,float16,fp8,0,0.055445333321889244
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,4,2,128,1,float16,float16,0,0.08702400326728821
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,4,4,128,1,float16,float16,0,0.0827466646830241
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,4,4,128,1,float16,fp8,0,0.07397333284219106
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,4,2,128,1,float16,fp8,0,0.08241599798202515
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,4,1,128,1,float16,float16,0,0.028805332879225414
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,4,1,128,1,float16,fp8,0,0.033402666449546814
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,4,2,128,1,float16,fp8,0,0.04196799794832865
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,4,2,128,1,float16,float16,0,0.03885866701602936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,4,4,128,1,float16,float16,0,0.082805335521698
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,4,4,128,1,float16,float16,0,0.03608533243338267
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,4,4,128,1,float16,fp8,0,0.07355199754238129
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,4,4,128,1,float16,fp8,0,0.04180799921353658
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,4,1,128,1,float16,float16,0,0.01961600035429001
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,4,1,128,1,float16,fp8,0,0.0230880007147789
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,4,2,128,1,float16,float16,0,0.02439466615517934
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,4,2,128,1,float16,fp8,0,0.027535999814669292
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,4,4,128,1,float16,float16,0,0.036533333361148834
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,4,4,128,1,float16,float16,0,0.022757334013779957
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,4,4,128,1,float16,fp8,0,0.04165333261092504
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,4,4,128,1,float16,fp8,0,0.025621332228183746
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,4,1,128,1,float16,float16,0,0.016496000190575916
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,4,1,128,1,float16,fp8,0,0.01945066700379054
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,4,2,128,1,float16,float16,0,0.018965333700180054
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,4,2,128,1,float16,fp8,0,0.022330666581789654
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,4,4,128,1,float16,float16,0,0.02279466638962428
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,4,4,128,1,float16,fp8,0,0.025568000972270966
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,4,4,128,1,float16,float16,0,0.01758933315674464
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,4,4,128,1,float16,fp8,0,0.018874666343132656
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,4,1,128,1,float16,float16,0,0.015546667079130808
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,4,1,128,1,float16,fp8,0,0.017765333255132038
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,4,2,128,1,float16,float16,0,0.01617066686352094
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,4,2,128,1,float16,fp8,0,0.01912533367673556
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,4,4,128,1,float16,float16,0,0.017477333545684814
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,4,4,128,1,float16,fp8,0,0.018922666708628338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,4,4,128,1,float16,float16,0,0.014650666465361914
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,4,4,128,1,float16,fp8,0,0.015317333241303762
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,4,1,128,1,float16,float16,0,0.01441066712141037
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,4,1,128,1,float16,fp8,0,0.01658133293191592
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,4,2,128,1,float16,float16,0,0.014394666999578476
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,4,2,128,1,float16,fp8,0,0.016501333564519882
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,4,4,128,1,float16,float16,0,0.014442666123310724
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,4,4,128,1,float16,fp8,0,0.015376000354687372
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,4,4,128,1,float16,float16,0,0.012826666235923767
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,4,2,128,1,float16,fp8,0,0.01605333387851715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,4,4,128,1,float16,fp8,0,0.013264000415802002
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,4,1,128,1,float16,float16,0,0.01381333296497663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,4,1,128,1,float16,fp8,0,0.016165333489576977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,4,2,128,1,float16,float16,0,0.013951999445756277
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,4,4,128,1,float16,float16,0,0.01301866645614306
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,4,4,128,1,float16,fp8,0,0.013242666920026144
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,4,4,128,1,float16,float16,0,0.011973333855470022
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,4,4,128,1,float16,fp8,0,0.012421333541472753
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,4,1,128,1,float16,float16,0,0.013642666240533194
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,4,1,128,1,float16,fp8,0,0.015722667177518208
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,4,2,128,1,float16,float16,0,0.013280000537633896
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,4,2,128,1,float16,fp8,0,0.015487999965747198
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,4,4,128,1,float16,float16,0,0.012005332857370377
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,4,4,128,1,float16,fp8,0,0.012805332740147909
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,4,4,128,1,float16,float16,0,0.011658667276302973
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,4,4,128,1,float16,fp8,0,0.012223999947309494
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,4,1,128,1,float16,float16,0,0.013621332744757334
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,4,1,128,1,float16,fp8,0,0.01545599972208341
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,4,2,128,1,float16,float16,0,0.013343999783198038
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,4,2,128,1,float16,fp8,0,0.015482666591803232
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,4,4,128,1,float16,float16,0,0.011530666301647821
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,4,4,128,1,float16,fp8,0,0.012479999413092932
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,4,1,128,1,float16,float16,0,0.02867199977238973
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,4,1,128,1,float16,fp8,0,0.033200000723203026
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,4,4,128,1,float16,float16,0,0.04298666616280874
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,4,2,128,1,float16,float16,0,0.042837331692377724
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,4,4,128,1,float16,fp8,0,0.04790399968624115
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,4,2,128,1,float16,fp8,0,0.04060266663630804
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,4,1,128,1,float16,float16,0,0.019402666638294857
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,4,1,128,1,float16,fp8,0,0.023002666731675465
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,4,2,128,1,float16,float16,0,0.026133333643277485
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,4,2,128,1,float16,fp8,0,0.027242665489514668
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,4,4,128,1,float16,float16,0,0.04297066728274027
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,4,4,128,1,float16,float16,0,0.025936000049114227
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,4,4,128,1,float16,fp8,0,0.047925333182017006
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,4,4,128,1,float16,fp8,0,0.028933333853880566
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,4,1,128,1,float16,float16,0,0.016613333175579708
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,4,1,128,1,float16,fp8,0,0.019461333751678467
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,4,2,128,1,float16,float16,0,0.017632000148296356
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,4,2,128,1,float16,fp8,0,0.018911999960740406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,4,4,128,1,float16,float16,0,0.02619733413060506
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,4,4,128,1,float16,fp8,0,0.029135999580224354
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,4,4,128,1,float16,float16,0,0.017701332767804463
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,4,4,128,1,float16,fp8,0,0.018976000448067982
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,4,1,128,1,float16,float16,0,0.01533866673707962
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,4,1,128,1,float16,fp8,0,0.018272000054518383
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,4,2,128,1,float16,float16,0,0.014549333602190018
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,4,2,128,1,float16,fp8,0,0.01533866673707962
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,4,4,128,1,float16,float16,0,0.01764800027012825
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,4,4,128,1,float16,fp8,0,0.018901333212852478
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,4,4,128,1,float16,float16,0,0.014629332969586054
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,4,4,128,1,float16,fp8,0,0.015423999478419622
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,4,1,128,1,float16,float16,0,0.01422400027513504
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,4,1,128,1,float16,fp8,0,0.016415999581416447
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,4,2,128,1,float16,float16,0,0.013088000317414602
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,4,2,128,1,float16,fp8,0,0.01369599997997284
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,4,4,128,1,float16,float16,0,0.014362666755914688
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,4,4,128,1,float16,fp8,0,0.015439999600251516
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,4,4,128,1,float16,float16,0,0.013301332791646322
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,4,4,128,1,float16,fp8,0,0.014015999933083853
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,4,2,128,1,float16,fp8,0,0.013183999806642532
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,4,1,128,1,float16,float16,0,0.013733333597580591
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,4,1,128,1,float16,fp8,0,0.01595199977358182
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,4,2,128,1,float16,float16,0,0.012698666503032049
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,4,4,128,1,float16,float16,0,0.012960000584522883
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,4,4,128,1,float16,fp8,0,0.013749333719412485
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,4,4,128,1,float16,float16,0,0.012373333175977072
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,4,4,128,1,float16,fp8,0,0.013130666067202887
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,4,1,128,1,float16,float16,0,0.013701333353916803
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,4,1,128,1,float16,fp8,0,0.015850666910409927
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,4,2,128,1,float16,float16,0,0.011877333124478659
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,4,2,128,1,float16,fp8,0,0.012746666868527731
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,4,4,128,1,float16,float16,0,0.012437333663304647
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,4,4,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,4,4,128,1,float16,float16,0,0.011829332758982977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,4,4,128,1,float16,fp8,0,0.01249066616098086
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,4,1,128,1,float16,float16,0,0.01328533391157786
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,4,1,128,1,float16,fp8,0,0.01568000018596649
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,4,2,128,1,float16,float16,0,0.011519999553759893
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,4,2,128,1,float16,fp8,0,0.012026666353146235
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,4,4,128,1,float16,float16,0,0.01179733375708262
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,4,4,128,1,float16,fp8,0,0.012266666938861212
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,4,4,128,1,float16,float16,0,0.01184533288081487
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,4,2,128,1,float16,float16,0,0.011237333218256632
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,4,4,128,1,float16,fp8,0,0.011861333002646765
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,4,1,128,1,float16,float16,0,0.013290667285521826
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,4,1,128,1,float16,fp8,0,0.01565333331624667
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,4,2,128,1,float16,fp8,0,0.012037333101034164
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,4,4,128,1,float16,float16,0,0.011429333438475927
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,4,4,128,1,float16,fp8,0,0.012378666549921036
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,4,1,128,1,float16,float16,0,0.021381333470344543
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,4,1,128,1,float16,fp8,0,0.0223786657055219
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,4,4,128,1,float16,float16,0,0.032842665910720825
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,4,2,128,1,float16,fp8,0,0.03367999941110611
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,4,2,128,1,float16,float16,0,0.032746667663256325
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,4,4,128,1,float16,fp8,0,0.03531199942032496
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,4,1,128,1,float16,float16,0,0.01525866612792015
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,4,1,128,1,float16,fp8,0,0.01613333324591319
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,4,2,128,1,float16,float16,0,0.020799999435742695
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,4,2,128,1,float16,fp8,0,0.022367998957633972
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,4,4,128,1,float16,float16,0,0.032842665910720825
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,4,4,128,1,float16,fp8,0,0.03544000039498011
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,4,1,128,1,float16,fp8,0,0.01402666668097178
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,4,4,128,1,float16,float16,0,0.021151999632517498
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,4,4,128,1,float16,fp8,0,0.022511998812357586
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,4,1,128,1,float16,float16,0,0.01349866638580958
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,4,2,128,1,float16,float16,0,0.014874666929244995
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,4,2,128,1,float16,fp8,0,0.015872000406185787
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,4,4,128,1,float16,float16,0,0.021301334102948506
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,4,4,128,1,float16,fp8,0,0.022704000274340313
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,4,4,128,1,float16,float16,0,0.014906667172908783
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,4,4,128,1,float16,fp8,0,0.016000000139077503
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,4,1,128,1,float16,float16,0,0.012757333616415659
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,4,1,128,1,float16,fp8,0,0.013514666507641474
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,4,2,128,1,float16,float16,0,0.013232000172138214
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,4,2,128,1,float16,fp8,0,0.013797332843144735
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,4,4,128,1,float16,float16,0,0.014783999572197596
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,4,4,128,1,float16,fp8,0,0.015749332805474598
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,4,4,128,1,float16,float16,0,0.013194666554530462
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,4,4,128,1,float16,fp8,0,0.013610667238632837
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,4,1,128,1,float16,float16,0,0.012149333953857422
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,4,4,128,1,float16,fp8,0,0.013872000078360239
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,4,1,128,1,float16,fp8,0,0.01268799975514412
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,4,2,128,1,float16,float16,0,0.013050666699806849
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,4,2,128,1,float16,fp8,0,0.013733333597580591
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,4,4,128,1,float16,float16,0,0.013274667163689932
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,4,4,128,1,float16,float16,0,0.012768000364303589
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,4,4,128,1,float16,fp8,0,0.013381333400805792
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,4,4,128,1,float16,float16,0,0.012869333227475485
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,4,1,128,1,float16,float16,0,0.01191466674208641
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,4,1,128,1,float16,fp8,0,0.0124746672809124
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,4,2,128,1,float16,float16,0,0.01221866657336553
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,4,2,128,1,float16,fp8,0,0.012821332861979803
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,4,2,128,1,float16,float16,0,0.011749333391586939
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,4,4,128,1,float16,fp8,0,0.013359999905029932
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,4,4,128,1,float16,float16,0,0.012266666938861212
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,4,4,128,1,float16,fp8,0,0.013125333935022354
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,4,1,128,1,float16,float16,0,0.011413333316644033
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,4,1,128,1,float16,fp8,0,0.012250666817029318
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,4,2,128,1,float16,fp8,0,0.011994666109482447
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,4,4,128,1,float16,float16,0,0.01221866657336553
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,4,4,128,1,float16,fp8,0,0.012560000022252401
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,4,4,128,1,float16,float16,0,0.011823999385039011
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,4,4,128,1,float16,fp8,0,0.012416000167528788
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,4,1,128,1,float16,float16,0,0.011338666081428528
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,4,1,128,1,float16,fp8,0,0.01192533348997434
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,4,2,128,1,float16,float16,0,0.011359999577204386
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,4,2,128,1,float16,fp8,0,0.011946666985750198
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,4,4,128,1,float16,float16,0,0.011701333026091257
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,4,4,128,1,float16,fp8,0,0.012234666695197424
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,4,4,128,1,float16,float16,0,0.011429333438475927
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,4,4,128,1,float16,fp8,0,0.012319999436537424
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,4,1,128,1,float16,float16,0,0.01126933346192042
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,4,1,128,1,float16,fp8,0,0.01190399999419848
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,4,2,128,1,float16,float16,0,0.011301333705584208
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,4,2,128,1,float16,fp8,0,0.01179733375708262
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,4,4,128,1,float16,float16,0,0.011296000331640244
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,4,4,128,1,float16,fp8,0,0.012047999848922094
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,2,1,128,1,float16,float16,0,1.9296693801879883
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,2,2,128,1,float16,float16,0,1.147642691930135
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16384,2,1,128,1,float16,fp8,0,1.9918932914733887
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,2,2,128,1,float16,fp8,0,1.1490240097045898
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,2,1,128,1,float16,float16,0,1.035200039545695
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,2,1,128,1,float16,fp8,0,1.0602773030598958
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,2,2,128,1,float16,float16,0,1.1463306744893391
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,2,2,128,1,float16,float16,0,0.6537013451258341
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,2,2,128,1,float16,fp8,0,1.1502933502197266
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,2,2,128,1,float16,fp8,0,0.6562560002009074
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,2,1,128,1,float16,float16,0,0.5692960023880005
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,2,1,128,1,float16,fp8,0,0.5913066864013672
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,2,2,128,1,float16,float16,0,0.6605120102564493
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,2,2,128,1,float16,float16,0,0.391759991645813
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,2,2,128,1,float16,fp8,0,0.6503520011901855
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,2,2,128,1,float16,fp8,0,0.39986666043599445
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,2,1,128,1,float16,float16,0,0.33396267890930176
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,2,1,128,1,float16,fp8,0,0.34969067573547363
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,2,2,128,1,float16,float16,0,0.3946400086085002
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,2,2,128,1,float16,fp8,0,0.40300798416137695
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,2,1,128,1,float16,float16,0,1.1711413065592449
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,2,2,128,1,float16,float16,0,0.7154719829559326
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,12288,2,1,128,1,float16,fp8,0,1.199669361114502
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,2,2,128,1,float16,fp8,0,0.7233066558837891
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,2,1,128,1,float16,float16,0,0.6366186539332072
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,2,2,128,1,float16,fp8,0,0.7173706690470377
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,2,1,128,1,float16,fp8,0,0.6552639802296957
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,2,2,128,1,float16,float16,0,0.7275839646657308
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,2,1,128,1,float16,fp8,0,0.37482134501139325
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,2,2,128,1,float16,float16,0,0.4124266703923543
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,2,2,128,1,float16,fp8,0,0.4182826677958171
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,2,2,128,1,float16,float16,0,0.2466986576716105
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,2,1,128,1,float16,float16,0,0.36456533273061115
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,2,2,128,1,float16,float16,0,0.41257067521413165
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,2,2,128,1,float16,fp8,0,0.4208960135777791
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,2,2,128,1,float16,fp8,0,0.2525973320007324
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,2,1,128,1,float16,float16,0,0.227728009223938
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,2,1,128,1,float16,fp8,0,0.24281599124272665
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,2,2,128,1,float16,float16,0,0.2446026603380839
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,2,2,128,1,float16,fp8,0,0.2572266658147176
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,2,1,128,1,float16,float16,0,0.8516480127970377
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,10240,2,1,128,1,float16,fp8,0,0.8738453388214111
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,2,2,128,1,float16,float16,0,0.5528053442637125
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,2,2,128,1,float16,fp8,0,0.5418879985809326
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,2,1,128,1,float16,float16,0,0.47062933444976807
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,2,1,128,1,float16,fp8,0,0.48949865500132245
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,2,2,128,1,float16,float16,0,0.5415786504745483
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,2,2,128,1,float16,fp8,0,0.5461386839548746
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,2,2,128,1,float16,float16,0,0.3237653374671936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,2,2,128,1,float16,fp8,0,0.32174400488535565
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,2,1,128,1,float16,float16,0,0.2712053259213765
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,2,1,128,1,float16,fp8,0,0.2834399938583374
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,2,2,128,1,float16,float16,0,0.3245919942855835
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,2,2,128,1,float16,fp8,0,0.3249280055363973
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,2,2,128,1,float16,float16,0,0.18797866503397623
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,2,2,128,1,float16,fp8,0,0.19860267639160156
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,2,2,128,1,float16,fp8,0,0.1978986660639445
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,2,1,128,1,float16,float16,0,0.1750613252321879
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,2,1,128,1,float16,fp8,0,0.18523200352986655
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,2,1,128,1,float16,fp8,0,1.1303306420644124
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,2,2,128,1,float16,float16,0,0.1894879937171936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,8192,2,1,128,1,float16,float16,0,1.1063306331634521
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,2,2,128,1,float16,float16,0,0.6810932954152426
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,2,2,128,1,float16,fp8,0,0.6921866734822592
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,2,1,128,1,float16,float16,0,0.5838773250579834
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,2,1,128,1,float16,fp8,0,0.6086399952570597
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,2,2,128,1,float16,float16,0,0.6848266919453939
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,2,2,128,1,float16,fp8,0,0.6911626656850179
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,2,2,128,1,float16,float16,0,0.4028000036875407
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,2,2,128,1,float16,fp8,0,0.39005335172017414
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,2,1,128,1,float16,float16,0,0.32676267623901367
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,2,1,128,1,float16,fp8,0,0.34694401423136395
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,2,2,128,1,float16,float16,0,0.3993706703186035
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,2,2,128,1,float16,fp8,0,0.3924320141474406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,2,2,128,1,float16,float16,0,0.235642671585083
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,2,2,128,1,float16,fp8,0,0.23810666799545288
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,2,1,128,1,float16,float16,0,0.19760000705718994
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,2,1,128,1,float16,fp8,0,0.20898133516311646
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,2,2,128,1,float16,float16,0,0.23632532358169556
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,2,2,128,1,float16,fp8,0,0.23755200703938803
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,2,2,128,1,float16,float16,0,0.1323040028413137
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,2,2,128,1,float16,fp8,0,0.1418400009473165
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,2,1,128,1,float16,float16,0,0.11913067102432251
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,2,1,128,1,float16,fp8,0,0.13238400220870972
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,2,2,128,1,float16,float16,0,0.13186132907867432
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,2,2,128,1,float16,fp8,0,0.14124799768129984
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,2,1,128,1,float16,float16,0,0.7077866395314535
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,6144,2,1,128,1,float16,fp8,0,0.6679840087890625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,2,2,128,1,float16,float16,0,0.46647465229034424
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,2,2,128,1,float16,fp8,0,0.4281333287556966
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,2,1,128,1,float16,float16,0,0.38185067971547443
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,2,1,128,1,float16,fp8,0,0.3652373154958089
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,2,2,128,1,float16,float16,0,0.47839999198913574
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,2,2,128,1,float16,fp8,0,0.4261173407236735
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,2,2,128,1,float16,float16,0,0.26917866865793866
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,2,2,128,1,float16,fp8,0,0.2456586758295695
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,2,1,128,1,float16,float16,0,0.22442134221394858
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,2,1,128,1,float16,fp8,0,0.20892266432444254
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,2,2,128,1,float16,float16,0,0.2675466736157735
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,2,2,128,1,float16,fp8,0,0.24542399247487387
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,2,2,128,1,float16,float16,0,0.1534933348496755
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,2,2,128,1,float16,fp8,0,0.14909332990646362
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,2,1,128,1,float16,float16,0,0.133242666721344
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,2,1,128,1,float16,fp8,0,0.1295413374900818
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,2,2,128,1,float16,float16,0,0.15467733144760132
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,2,2,128,1,float16,fp8,0,0.14340800046920776
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,2,2,128,1,float16,float16,0,0.103301336367925
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,2,2,128,1,float16,fp8,0,0.10020800431569417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,2,1,128,1,float16,float16,0,0.09518399834632874
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,2,1,128,1,float16,fp8,0,0.09251733620961507
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,2,2,128,1,float16,float16,0,0.10374400019645691
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,2,2,128,1,float16,fp8,0,0.1013813316822052
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,2,1,128,1,float16,float16,0,0.6941813627878824
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,2,2,128,1,float16,float16,0,0.4711466630299886
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,4096,2,1,128,1,float16,fp8,0,0.6709919770558676
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,2,2,128,1,float16,fp8,0,0.43942399819691974
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,2,1,128,1,float16,float16,0,0.3729226589202881
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,2,1,128,1,float16,fp8,0,0.3624533414840698
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,2,2,128,1,float16,float16,0,0.47545599937438965
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,2,2,128,1,float16,float16,0,0.28041066726048786
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,2,2,128,1,float16,fp8,0,0.43886399269104004
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,2,2,128,1,float16,fp8,0,0.25034666061401367
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,2,1,128,1,float16,float16,0,0.210533340771993
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,2,1,128,1,float16,fp8,0,0.20553600788116455
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,2,2,128,1,float16,float16,0,0.2698240081469218
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,2,1,128,1,float16,float16,0,0.12191999951998393
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,2,1,128,1,float16,fp8,0,0.12077333529790242
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,2,2,128,1,float16,fp8,0,0.24949334065119425
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,2,2,128,1,float16,float16,0,0.16056533654530844
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,2,2,128,1,float16,fp8,0,0.13900267084439596
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,2,2,128,1,float16,float16,0,0.16171733538309732
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,2,2,128,1,float16,fp8,0,0.1418880025545756
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,2,2,128,1,float16,float16,0,0.08358400066693623
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,2,2,128,1,float16,float16,0,0.08438400427500407
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,2,2,128,1,float16,fp8,0,0.08338666955629985
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,2,1,128,1,float16,float16,0,0.0726453314224879
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,2,1,128,1,float16,fp8,0,0.07341866691907246
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,2,2,128,1,float16,fp8,0,0.08237333099047343
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,2,2,128,1,float16,float16,0,0.0728000005086263
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,2,2,128,1,float16,fp8,0,0.07369066774845123
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,2,1,128,1,float16,float16,0,0.06703466673692067
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,2,2,128,1,float16,fp8,0,0.07374399900436401
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,2,1,128,1,float16,fp8,0,0.06768533090750377
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,2,2,128,1,float16,float16,0,0.0732586681842804
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,2,2,128,1,float16,float16,0,0.3295946717262268
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,2,2,128,1,float16,fp8,0,0.30615466833114624
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,2,1,128,1,float16,float16,0,0.24665600061416626
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,2,1,128,1,float16,float16,0,0.45054399967193604
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,3072,2,1,128,1,float16,fp8,0,0.4430239995320638
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,2,1,128,1,float16,fp8,0,0.24491733312606812
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,2,2,128,1,float16,float16,0,0.3244160016377767
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,2,2,128,1,float16,fp8,0,0.3073280056317647
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,2,2,128,1,float16,float16,0,0.18681599696477255
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,2,2,128,1,float16,fp8,0,0.17461333672205606
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,2,1,128,1,float16,float16,0,0.14060800274213156
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,2,1,128,1,float16,fp8,0,0.13757333159446716
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,2,2,128,1,float16,float16,0,0.10035199920336406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,2,2,128,1,float16,fp8,0,0.1746079921722412
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,2,2,128,1,float16,float16,0,0.18570133050282797
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,2,2,128,1,float16,fp8,0,0.09855467081069946
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,2,1,128,1,float16,float16,0,0.08283733328183492
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,2,1,128,1,float16,fp8,0,0.08698667089144389
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,2,2,128,1,float16,float16,0,0.10038933157920837
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,2,1,128,1,float16,fp8,0,0.06044800082842509
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,2,2,128,1,float16,fp8,0,0.09962667028109233
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,2,2,128,1,float16,float16,0,0.065610667069753
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,2,2,128,1,float16,fp8,0,0.0670666644970576
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,2,1,128,1,float16,float16,0,0.057130664587020874
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,2,2,128,1,float16,float16,0,0.06544533371925354
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,2,2,128,1,float16,fp8,0,0.0670666644970576
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,2,2,128,1,float16,float16,0,0.05765333275000254
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,2,2,128,1,float16,fp8,0,0.06015466650327047
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,2,1,128,1,float16,float16,0,0.053717335065205894
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,2,1,128,1,float16,fp8,0,0.0557226687669754
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,2,2,128,1,float16,float16,0,0.05784533421198527
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,2,2,128,1,float16,fp8,0,0.05895466605822245
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,2,1,128,1,float16,float16,0,0.4744746685028076
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,2,2,128,1,float16,float16,0,0.3637919823328654
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,2048,2,1,128,1,float16,fp8,0,0.4814346631368001
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,2,2,128,1,float16,fp8,0,0.3380800088246663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,2,1,128,1,float16,float16,0,0.2555306752522786
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,2,1,128,1,float16,fp8,0,0.2587839961051941
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,2,2,128,1,float16,float16,0,0.35286935170491535
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,2,1,128,1,float16,fp8,0,0.1423413356145223
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,2,2,128,1,float16,fp8,0,0.3354293505350749
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,2,2,128,1,float16,float16,0,0.2108746568361918
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,2,2,128,1,float16,fp8,0,0.18728532393773398
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,2,1,128,1,float16,float16,0,0.14230933785438538
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,2,2,128,1,float16,float16,0,0.20898133516311646
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,2,2,128,1,float16,fp8,0,0.18774400154749551
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,2,2,128,1,float16,fp8,0,0.10428266723950703
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,2,2,128,1,float16,float16,0,0.1188213328520457
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,2,1,128,1,float16,float16,0,0.0811359981695811
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,2,1,128,1,float16,fp8,0,0.08298133313655853
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,2,2,128,1,float16,float16,0,0.11909866333007812
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,2,2,128,1,float16,fp8,0,0.09941333532333374
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,2,2,128,1,float16,float16,0,0.05885333319505056
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,2,2,128,1,float16,fp8,0,0.058277333776156105
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,2,1,128,1,float16,float16,0,0.04756266872088114
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,2,1,128,1,float16,fp8,0,0.048767998814582825
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,2,2,128,1,float16,fp8,0,0.049216002225875854
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,2,2,128,1,float16,float16,0,0.05818133552869161
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,2,2,128,1,float16,fp8,0,0.0580213318268458
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,2,2,128,1,float16,float16,0,0.0476800004641215
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,2,1,128,1,float16,float16,0,0.042591998974482216
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,2,1,128,1,float16,fp8,0,0.0447573314110438
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,2,2,128,1,float16,float16,0,0.047370667258898415
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,2,2,128,1,float16,fp8,0,0.04929066697756449
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,2,2,128,1,float16,float16,0,0.04234133164087931
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,2,2,128,1,float16,fp8,0,0.0440533310174942
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,2,1,128,1,float16,float16,0,0.03974399964014689
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,2,1,128,1,float16,fp8,0,0.0403466671705246
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,2,2,128,1,float16,float16,0,0.04241600135962168
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,2,2,128,1,float16,fp8,0,0.044122666120529175
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,2,1,128,1,float16,float16,0,0.32477867603302
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1536,2,1,128,1,float16,fp8,0,0.3344693183898926
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,2,2,128,1,float16,float16,0,0.259061336517334
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,2,1,128,1,float16,float16,0,0.17618666092554727
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,2,2,128,1,float16,fp8,0,0.244704008102417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,2,1,128,1,float16,fp8,0,0.18357332547505698
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,2,2,128,1,float16,float16,0,0.2589706579844157
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,2,2,128,1,float16,fp8,0,0.2443093260129293
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,2,2,128,1,float16,float16,0,0.1477120021979014
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,2,2,128,1,float16,fp8,0,0.13832533359527588
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,2,1,128,1,float16,float16,0,0.1002506713072459
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,2,1,128,1,float16,fp8,0,0.10147733489672343
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,2,2,128,1,float16,float16,0,0.14737600088119507
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,2,2,128,1,float16,fp8,0,0.13780799508094788
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,2,2,128,1,float16,float16,0,0.08146666487058003
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,2,2,128,1,float16,fp8,0,0.07495999832948048
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,2,1,128,1,float16,float16,0,0.05693866809209188
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,2,1,128,1,float16,fp8,0,0.06324266890684764
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,2,2,128,1,float16,float16,0,0.07758933305740356
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,2,2,128,1,float16,fp8,0,0.07357333103815715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,2,2,128,1,float16,float16,0,0.046021332343419395
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,2,2,128,1,float16,fp8,0,0.0498879998922348
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,2,1,128,1,float16,float16,0,0.03832533210515976
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,2,1,128,1,float16,fp8,0,0.0420959989229838
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,2,2,128,1,float16,float16,0,0.046351999044418335
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,2,2,128,1,float16,fp8,0,0.04971200227737427
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,2,2,128,1,float16,float16,0,0.03868799904982249
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,2,1,128,1,float16,float16,0,0.03446399917205175
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,2,2,128,1,float16,fp8,0,0.042853335539499916
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,2,1,128,1,float16,fp8,0,0.038191998998324074
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,2,2,128,1,float16,float16,0,0.038592000802357994
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,2,2,128,1,float16,fp8,0,0.042490666111310325
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,2,2,128,1,float16,float16,0,0.03465600063403448
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,2,2,128,1,float16,fp8,0,0.038586666186650596
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,2,1,128,1,float16,float16,0,0.03317866722742716
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,2,1,128,1,float16,fp8,0,0.035887998839219414
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,2,2,128,1,float16,float16,0,0.03469333300987879
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,2,2,128,1,float16,fp8,0,0.03875199953715006
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,2,1,128,1,float16,float16,0,0.3666880130767822
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,2,2,128,1,float16,float16,0,0.30350399017333984
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,2,2,128,1,float16,fp8,0,0.29201600948969525
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,2,1,128,1,float16,fp8,0,0.20748800039291382
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,2,1,128,1,float16,float16,0,0.1957706610361735
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,2,2,128,1,float16,float16,0,0.3071413238843282
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,2,2,128,1,float16,fp8,0,0.29073599974314374
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,1024,2,1,128,1,float16,fp8,0,0.39105065663655597
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,2,2,128,1,float16,float16,0,0.1770346760749817
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,2,2,128,1,float16,fp8,0,0.161189337571462
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,2,1,128,1,float16,float16,0,0.1086293359597524
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,2,1,128,1,float16,fp8,0,0.11686399579048157
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,2,2,128,1,float16,float16,0,0.17684266964594522
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,2,2,128,1,float16,fp8,0,0.16064000129699707
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,2,2,128,1,float16,float16,0,0.09959466258684795
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,2,1,128,1,float16,float16,0,0.05748266478379568
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,2,2,128,1,float16,fp8,0,0.08541333675384521
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,2,1,128,1,float16,fp8,0,0.0643146683772405
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,2,2,128,1,float16,float16,0,0.09734400113423665
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,2,2,128,1,float16,fp8,0,0.08564800024032593
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,2,2,128,1,float16,float16,0,0.045509333411852516
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,2,2,128,1,float16,float16,0,0.04548266530036926
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,2,2,128,1,float16,fp8,0,0.04878933231035868
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,2,2,128,1,float16,fp8,0,0.03939199944337209
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,2,1,128,1,float16,float16,0,0.03478399912516276
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,2,1,128,1,float16,fp8,0,0.03826666623353958
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,2,2,128,1,float16,fp8,0,0.04877333343029022
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,2,2,128,1,float16,float16,0,0.03486400097608566
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,2,2,128,1,float16,float16,0,0.029690665503342945
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,2,1,128,1,float16,float16,0,0.029824001093705494
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,2,1,128,1,float16,fp8,0,0.03345066557327906
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,2,2,128,1,float16,float16,0,0.034927998979886375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,2,2,128,1,float16,fp8,0,0.0391893337170283
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,2,2,128,1,float16,fp8,0,0.034202667574087776
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,2,1,128,1,float16,float16,0,0.026954665780067444
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,2,2,128,1,float16,float16,0,0.0296426663796107
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,2,1,128,1,float16,fp8,0,0.03009066730737686
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,2,2,128,1,float16,fp8,0,0.03421866645415624
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,2,2,128,1,float16,float16,0,0.026416001220544178
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,2,2,128,1,float16,fp8,0,0.030581332743167877
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,2,2,128,1,float16,fp8,0,0.030917334059874218
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,2,1,128,1,float16,float16,0,0.025237334271272022
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,2,1,128,1,float16,fp8,0,0.027935999135176342
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,2,2,128,1,float16,float16,0,0.026517334083716076
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,2,2,128,1,float16,float16,0,0.2644053300221761
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,2,2,128,1,float16,fp8,0,0.25933865706125897
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,2,1,128,1,float16,float16,0,0.2832159996032715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,2,1,128,1,float16,float16,0,0.149536003669103
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,512,2,1,128,1,float16,fp8,0,0.32096532980600995
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,2,1,128,1,float16,fp8,0,0.17341333627700806
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,2,2,128,1,float16,float16,0,0.2651306589444478
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,2,2,128,1,float16,fp8,0,0.25733866294225055
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,2,2,128,1,float16,float16,0,0.15294399857521057
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,2,2,128,1,float16,fp8,0,0.14363732933998108
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,2,1,128,1,float16,float16,0,0.08474133412043254
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,2,2,128,1,float16,float16,0,0.15253866712252298
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,2,1,128,1,float16,fp8,0,0.09821333487828572
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,2,2,128,1,float16,fp8,0,0.14271466930707297
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,2,2,128,1,float16,fp8,0,0.0709440012772878
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,2,1,128,1,float16,float16,0,0.04378666480382284
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,2,1,128,1,float16,fp8,0,0.053599998354911804
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,2,2,128,1,float16,float16,0,0.08356799681981404
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,2,2,128,1,float16,float16,0,0.08373333017031352
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,2,2,128,1,float16,fp8,0,0.07771199941635132
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,2,2,128,1,float16,float16,0,0.038634667793909706
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,2,2,128,1,float16,fp8,0,0.043509334325790405
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,2,1,128,1,float16,float16,0,0.02755733331044515
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,2,1,128,1,float16,fp8,0,0.03398933261632919
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,2,2,128,1,float16,float16,0,0.038592000802357994
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,2,2,128,1,float16,fp8,0,0.043381333351135254
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,2,2,128,1,float16,float16,0,0.02842666705449422
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,2,2,128,1,float16,float16,0,0.028064000109831493
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,2,2,128,1,float16,fp8,0,0.03472000112136205
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,2,1,128,1,float16,float16,0,0.023285334308942158
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,2,1,128,1,float16,fp8,0,0.029930666089057922
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,2,2,128,1,float16,fp8,0,0.035002666215101876
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,2,2,128,1,float16,float16,0,0.023562667270501454
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,2,2,128,1,float16,fp8,0,0.0295413335164388
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,2,1,128,1,float16,float16,0,0.020202666521072388
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,2,1,128,1,float16,fp8,0,0.026341333985328674
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,2,2,128,1,float16,float16,0,0.023370665808518726
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,2,2,128,1,float16,fp8,0,0.029802667597929638
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,2,2,128,1,float16,float16,0,0.020479999482631683
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,2,2,128,1,float16,fp8,0,0.025397333006064098
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,2,1,128,1,float16,float16,0,0.019258666783571243
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,2,1,128,1,float16,fp8,0,0.0235359991590182
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,2,2,128,1,float16,float16,0,0.02054399996995926
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,2,2,128,1,float16,fp8,0,0.025781333446502686
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,2,2,128,1,float16,float16,0,0.01884799947341283
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,2,2,128,1,float16,fp8,0,0.023605334262053173
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,2,1,128,1,float16,float16,0,0.01863466699918111
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,2,1,128,1,float16,fp8,0,0.02347733328739802
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,2,2,128,1,float16,float16,0,0.018613333503405254
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,2,2,128,1,float16,fp8,0,0.02347733328739802
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,2,1,128,1,float16,float16,0,0.13727466265360513
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,2,2,128,1,float16,float16,0,0.14611732959747314
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,2,2,128,1,float16,fp8,0,0.1330560048421224
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,2,1,128,1,float16,float16,0,0.07720533510049184
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,256,2,1,128,1,float16,fp8,0,0.15974400440851846
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,2,1,128,1,float16,fp8,0,0.08969066540400188
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,2,2,128,1,float16,fp8,0,0.13244799772898355
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,2,2,128,1,float16,float16,0,0.14454399545987448
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,2,2,128,1,float16,float16,0,0.07720533510049184
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,2,2,128,1,float16,fp8,0,0.07063466807206471
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,2,1,128,1,float16,float16,0,0.037952000896135964
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,2,1,128,1,float16,fp8,0,0.0469706654548645
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,2,2,128,1,float16,float16,0,0.07694399853547414
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,2,2,128,1,float16,fp8,0,0.07134933272997539
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,2,2,128,1,float16,float16,0,0.0356480007370313
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,2,2,128,1,float16,fp8,0,0.0399893323580424
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,2,1,128,1,float16,float16,0,0.024703999360402424
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,2,1,128,1,float16,fp8,0,0.03035199890534083
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,2,2,128,1,float16,float16,0,0.03565333286921183
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,2,2,128,1,float16,fp8,0,0.03994666785001755
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,2,2,128,1,float16,float16,0,0.025573333104451496
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,2,2,128,1,float16,fp8,0,0.03178666780392329
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,2,1,128,1,float16,float16,0,0.020597333709398907
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,2,1,128,1,float16,fp8,0,0.026394667724768322
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,2,2,128,1,float16,float16,0,0.025487999121348064
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,2,2,128,1,float16,fp8,0,0.03156800071398417
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,2,2,128,1,float16,float16,0,0.020703999946514767
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,2,2,128,1,float16,fp8,0,0.026533332963784535
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,2,1,128,1,float16,float16,0,0.0182239996890227
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,2,1,128,1,float16,fp8,0,0.02252800017595291
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,2,2,128,1,float16,float16,0,0.02054399996995926
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,2,2,128,1,float16,fp8,0,0.02651199946800868
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,2,2,128,1,float16,float16,0,0.01724799970785777
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,2,2,128,1,float16,fp8,0,0.022240000466505688
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,2,1,128,1,float16,float16,0,0.01618133361140887
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,2,1,128,1,float16,fp8,0,0.019760000209013622
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,2,2,128,1,float16,float16,0,0.0174346665541331
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,2,1,128,1,float16,fp8,0,0.0194560003777345
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,2,2,128,1,float16,fp8,0,0.022122666239738464
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,2,2,128,1,float16,float16,0,0.015861333658297855
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,2,2,128,1,float16,fp8,0,0.019424000134070713
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,2,1,128,1,float16,float16,0,0.015615999698638916
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,2,2,128,1,float16,float16,0,0.015791999797026317
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,2,2,128,1,float16,fp8,0,0.019653332730134327
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,2,2,128,1,float16,float16,0,0.015386667102575302
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,2,2,128,1,float16,fp8,0,0.019237333287795384
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,2,1,128,1,float16,float16,0,0.015322666615247726
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,2,1,128,1,float16,fp8,0,0.018805333723624546
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,2,2,128,1,float16,float16,0,0.015482666591803232
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,2,2,128,1,float16,fp8,0,0.019007999449968338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,2,2,128,1,float16,float16,0,0.07492266595363617
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,2,1,128,1,float16,float16,0,0.07401066521803538
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,128,2,1,128,1,float16,fp8,0,0.07987200220425923
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,2,2,128,1,float16,fp8,0,0.06116800010204315
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,2,1,128,1,float16,float16,0,0.035504000882307686
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,2,1,128,1,float16,fp8,0,0.04012800008058548
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,2,2,128,1,float16,float16,0,0.07492800056934357
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,2,2,128,1,float16,fp8,0,0.056421334544817604
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,2,2,128,1,float16,float16,0,0.03409600009520849
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,2,2,128,1,float16,fp8,0,0.03561066587766012
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,2,1,128,1,float16,float16,0,0.02366400013367335
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,2,1,128,1,float16,fp8,0,0.026746665438016255
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,2,2,128,1,float16,float16,0,0.03428266694148382
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,2,2,128,1,float16,fp8,0,0.035642666121323906
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,2,1,128,1,float16,fp8,0,0.02256533255179723
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,2,2,128,1,float16,float16,0,0.023786666492621105
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,2,2,128,1,float16,fp8,0,0.027535999814669292
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,2,1,128,1,float16,float16,0,0.018960000326236088
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,2,2,128,1,float16,float16,0,0.02402133246262868
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,2,2,128,1,float16,fp8,0,0.027061333258946735
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,2,2,128,1,float16,float16,0,0.01886933296918869
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,2,2,128,1,float16,fp8,0,0.02219199885924657
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,2,1,128,1,float16,float16,0,0.016645333419243496
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,2,1,128,1,float16,fp8,0,0.019482667247454327
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,2,2,128,1,float16,float16,0,0.018922666708628338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,2,2,128,1,float16,fp8,0,0.021989333132902782
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,2,2,128,1,float16,float16,0,0.016010666886965435
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,2,2,128,1,float16,fp8,0,0.018186666071414948
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,2,1,128,1,float16,float16,0,0.014645333091417948
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,2,2,128,1,float16,float16,0,0.01595199977358182
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,2,1,128,1,float16,fp8,0,0.01782400036851565
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,2,2,128,1,float16,fp8,0,0.01834133391578992
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,2,2,128,1,float16,float16,0,0.014101333916187286
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,2,2,128,1,float16,fp8,0,0.015930666277805965
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,2,1,128,1,float16,float16,0,0.013909333695967993
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,2,1,128,1,float16,fp8,0,0.01632000009218852
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,2,2,128,1,float16,float16,0,0.014080000420411428
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,2,2,128,1,float16,fp8,0,0.016341333587964375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,2,2,128,1,float16,float16,0,0.013637332866589228
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,2,2,128,1,float16,fp8,0,0.016208000481128693
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,2,1,128,1,float16,float16,0,0.013663999736309052
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,2,1,128,1,float16,fp8,0,0.015754666179418564
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,2,2,128,1,float16,float16,0,0.01381333296497663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,2,2,128,1,float16,fp8,0,0.015658666690190632
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,2,2,128,1,float16,float16,0,0.013461332768201828
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,2,2,128,1,float16,fp8,0,0.01595199977358182
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,2,1,128,1,float16,float16,0,0.013637332866589228
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,2,1,128,1,float16,fp8,0,0.016271999726692837
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,2,2,128,1,float16,float16,0,0.01333333303531011
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,2,2,128,1,float16,fp8,0,0.015381333728631338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,2,2,128,1,float16,float16,0,0.035631999373435974
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,2,1,128,1,float16,float16,0,0.03551999976237615
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,2,2,128,1,float16,fp8,0,0.03532266616821289
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,64,2,1,128,1,float16,fp8,0,0.03955200066169103
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,2,1,128,1,float16,float16,0,0.02333866556485494
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,2,1,128,1,float16,fp8,0,0.02718399961789449
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,2,2,128,1,float16,float16,0,0.03568533311287562
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,2,2,128,1,float16,float16,0,0.022367998957633972
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,2,2,128,1,float16,fp8,0,0.03522666543722153
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,2,2,128,1,float16,fp8,0,0.023386667172114056
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,2,1,128,1,float16,float16,0,0.018922666708628338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,2,1,128,1,float16,fp8,0,0.02248000105222066
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,2,2,128,1,float16,float16,0,0.022287999590237934
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,2,2,128,1,float16,fp8,0,0.023546665906906128
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,2,2,128,1,float16,float16,0,0.017093333105246227
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,2,2,128,1,float16,fp8,0,0.018426666657129925
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,2,1,128,1,float16,float16,0,0.016208000481128693
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,2,1,128,1,float16,fp8,0,0.018746666610240936
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,2,2,128,1,float16,float16,0,0.017029333859682083
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,2,2,128,1,float16,fp8,0,0.018426666657129925
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,2,2,128,1,float16,float16,0,0.014389333625634512
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,2,2,128,1,float16,fp8,0,0.015376000354687372
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,2,1,128,1,float16,float16,0,0.014287999520699183
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,2,1,128,1,float16,fp8,0,0.016677333662907284
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,2,2,128,1,float16,float16,0,0.014368000129858652
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,2,2,128,1,float16,fp8,0,0.01563199982047081
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,2,2,128,1,float16,float16,0,0.012543999900420507
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,2,2,128,1,float16,fp8,0,0.012981332838535309
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,2,1,128,1,float16,float16,0,0.014042666802803675
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,2,1,128,1,float16,fp8,0,0.016208000481128693
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,2,2,128,1,float16,float16,0,0.012495999534924826
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,2,2,128,1,float16,fp8,0,0.012869333227475485
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,2,2,128,1,float16,float16,0,0.012047999848922094
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,2,2,128,1,float16,fp8,0,0.012661332885424295
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,2,1,128,1,float16,float16,0,0.013343999783198038
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,2,1,128,1,float16,fp8,0,0.015546667079130808
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,2,2,128,1,float16,float16,0,0.0120319997270902
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,2,2,128,1,float16,fp8,0,0.012762666990359625
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,2,2,128,1,float16,float16,0,0.011546666423479715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,2,2,128,1,float16,fp8,0,0.012245333443085352
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,2,1,128,1,float16,float16,0,0.013264000415802002
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,2,1,128,1,float16,fp8,0,0.01581866666674614
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,2,2,128,1,float16,float16,0,0.011349332829316458
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,2,2,128,1,float16,fp8,0,0.012165332833925882
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,2,2,128,1,float16,float16,0,0.011333333949247995
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,2,2,128,1,float16,fp8,0,0.012357333054145178
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,2,1,128,1,float16,float16,0,0.013269333789745966
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,2,1,128,1,float16,fp8,0,0.01545599972208341
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,2,2,128,1,float16,float16,0,0.01121066634853681
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,2,2,128,1,float16,fp8,0,0.012080000092585882
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,2,1,128,1,float16,float16,0,0.025397333006064098
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,2,2,128,1,float16,float16,0,0.02587199956178665
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,32,2,1,128,1,float16,fp8,0,0.026591998835404713
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,2,2,128,1,float16,fp8,0,0.027210667729377747
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,2,1,128,1,float16,float16,0,0.017279999951521557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,2,1,128,1,float16,fp8,0,0.018757333358128864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,2,2,128,1,float16,float16,0,0.02590399980545044
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,2,2,128,1,float16,fp8,0,0.027141332626342773
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,2,2,128,1,float16,float16,0,0.017488000293572743
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,2,2,128,1,float16,fp8,0,0.018725333114465077
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,2,1,128,1,float16,float16,0,0.014570667097965876
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,2,1,128,1,float16,fp8,0,0.01544533297419548
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,2,2,128,1,float16,float16,0,0.01738133281469345
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,2,2,128,1,float16,fp8,0,0.018906666586796444
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,2,2,128,1,float16,float16,0,0.014463999619086584
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,2,2,128,1,float16,fp8,0,0.015466666469971338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,2,1,128,1,float16,float16,0,0.012991999586423239
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,2,1,128,1,float16,fp8,0,0.013898666948080063
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,2,2,128,1,float16,float16,0,0.014426667243242264
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,2,2,128,1,float16,fp8,0,0.01522133375207583
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,2,2,128,1,float16,float16,0,0.013194666554530462
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,2,2,128,1,float16,fp8,0,0.013562666873137156
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,2,1,128,1,float16,float16,0,0.01239466667175293
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,2,1,128,1,float16,fp8,0,0.012624000509579977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,2,2,128,1,float16,float16,0,0.013088000317414602
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,2,2,128,1,float16,fp8,0,0.013455999394257864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,2,2,128,1,float16,float16,0,0.011957333733638128
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,2,2,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,2,1,128,1,float16,float16,0,0.011855999628702799
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,2,1,128,1,float16,fp8,0,0.012378666549921036
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,2,2,128,1,float16,float16,0,0.01211200033624967
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,2,2,128,1,float16,fp8,0,0.01331199953953425
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,2,2,128,1,float16,float16,0,0.01179733375708262
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,2,2,128,1,float16,fp8,0,0.01239466667175293
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,2,1,128,1,float16,fp8,0,0.012245333443085352
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,2,1,128,1,float16,float16,0,0.011648000528415045
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,2,2,128,1,float16,float16,0,0.011690666278203329
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,2,2,128,1,float16,fp8,0,0.01251199965675672
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,2,2,128,1,float16,float16,0,0.011301333705584208
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,2,2,128,1,float16,fp8,0,0.012015999605258306
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,2,1,128,1,float16,float16,0,0.011354666203260422
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,2,1,128,1,float16,fp8,0,0.012005332857370377
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,2,2,128,1,float16,float16,0,0.011407999942700068
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,2,2,128,1,float16,fp8,0,0.012437333663304647
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,2,2,128,1,float16,float16,0,0.01137599969903628
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,2,2,128,1,float16,fp8,0,0.011760000139474869
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,2,1,128,1,float16,float16,0,0.011328000575304031
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,2,1,128,1,float16,fp8,0,0.011989332735538483
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,2,2,128,1,float16,float16,0,0.011237333218256632
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,2,2,128,1,float16,fp8,0,0.011920000116030375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,2,2,128,1,float16,float16,0,0.02093333254257838
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,2,1,128,1,float16,float16,0,0.021397332350413006
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,2,2,128,1,float16,fp8,0,0.022517333428064983
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,256,16,2,1,128,1,float16,fp8,0,0.022810667753219604
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,2,1,128,1,float16,float16,0,0.015135999768972397
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,2,1,128,1,float16,fp8,0,0.01603200038274129
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,2,2,128,1,float16,float16,0,0.021151999632517498
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,2,2,128,1,float16,float16,0,0.014720000326633453
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,2,1,128,1,float16,fp8,0,0.014149333039919535
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,2,2,128,1,float16,fp8,0,0.022069332500298817
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,2,2,128,1,float16,fp8,0,0.015728000551462173
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,2,1,128,1,float16,float16,0,0.013434667140245438
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,2,2,128,1,float16,float16,0,0.01481066644191742
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,2,2,128,1,float16,fp8,0,0.015856000284353893
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,2,2,128,1,float16,float16,0,0.013248000293970108
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,2,2,128,1,float16,fp8,0,0.013503999759753546
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,2,1,128,1,float16,float16,0,0.012752000242471695
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,2,1,128,1,float16,fp8,0,0.013455999394257864
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,2,2,128,1,float16,float16,0,0.013301332791646322
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,2,2,128,1,float16,fp8,0,0.013749333719412485
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,2,2,128,1,float16,float16,0,0.012437333663304647
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,2,2,128,1,float16,fp8,0,0.013354666531085968
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,2,1,128,1,float16,float16,0,0.012085333466529846
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,2,1,128,1,float16,fp8,0,0.012703999876976013
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,2,2,128,1,float16,float16,0,0.012565333396196365
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,2,2,128,1,float16,fp8,0,0.013445333888133367
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,2,2,128,1,float16,float16,0,0.012058666596810022
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,2,2,128,1,float16,fp8,0,0.01257066677014033
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,2,1,128,1,float16,float16,0,0.0116799995303154
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,2,1,128,1,float16,fp8,0,0.01246400053302447
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,2,2,128,1,float16,float16,0,0.012256000190973282
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,2,2,128,1,float16,fp8,0,0.012784000486135483
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,2,2,128,1,float16,float16,0,0.011626667032639185
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,2,2,128,1,float16,fp8,0,0.012186666329701742
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,2,1,128,1,float16,float16,0,0.011434666812419891
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,2,1,128,1,float16,fp8,0,0.011770666887362799
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,2,2,128,1,float16,float16,0,0.011648000528415045
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,2,2,128,1,float16,fp8,0,0.012170666207869848
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,2,2,128,1,float16,float16,0,0.011296000331640244
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,2,2,128,1,float16,fp8,0,0.011978667229413986
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,2,1,128,1,float16,float16,0,0.011594666788975397
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,2,1,128,1,float16,fp8,0,0.011920000116030375
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,2,2,128,1,float16,float16,0,0.01116266722480456
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,2,2,128,1,float16,fp8,0,0.011813333878914515
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,2,2,128,1,float16,float16,0,0.011055999745925268
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,2,2,128,1,float16,fp8,0,0.011754666765530905
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,2,1,128,1,float16,float16,0,0.011055999745925268
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,2,1,128,1,float16,fp8,0,0.01192533348997434
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,2,2,128,1,float16,float16,0,0.011477333803971609
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,2,2,128,1,float16,fp8,0,0.011871999750534693
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,1,1,128,1,float16,float16,0,0.6231840054194132
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,1,1,128,1,float16,fp8,0,0.650325338045756
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,1,1,128,1,float16,float16,0,0.620954672495524
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,1,1,128,1,float16,float16,0,0.38091198603312176
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16384,1,1,128,1,float16,fp8,0,0.6509759823481241
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,1,1,128,1,float16,float16,0,0.3810826539993286
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,1,1,128,1,float16,fp8,0,0.40165865421295166
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16384,1,1,128,1,float16,fp8,0,0.3941546678543091
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,1,1,128,1,float16,fp8,0,0.24458134174346924
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,1,1,128,1,float16,float16,0,0.229802668094635
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,1,1,128,1,float16,fp8,0,0.2435893416404724
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16384,1,1,128,1,float16,float16,0,0.22925333182017008
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,1,1,128,1,float16,float16,0,0.4130773146947225
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,1,1,128,1,float16,fp8,0,0.3851413329442342
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,1,1,128,1,float16,float16,0,0.41174932320912677
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,12288,1,1,128,1,float16,fp8,0,0.3845280011494954
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,1,1,128,1,float16,float16,0,0.25110934178034466
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,1,1,128,1,float16,fp8,0,0.22645866870880127
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,1,1,128,1,float16,float16,0,0.250602662563324
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,12288,1,1,128,1,float16,fp8,0,0.22913599014282227
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,1,1,128,1,float16,float16,0,0.17924267053604126
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,1,1,128,1,float16,fp8,0,0.16683199008305868
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,1,1,128,1,float16,float16,0,0.17937066157658896
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,12288,1,1,128,1,float16,fp8,0,0.16793066263198853
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,1,1,128,1,float16,float16,0,0.3142506678899129
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,1,1,128,1,float16,fp8,0,0.2953919967015584
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,1,1,128,1,float16,fp8,0,0.18014399210611978
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,1,1,128,1,float16,float16,0,0.3184586763381958
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,10240,1,1,128,1,float16,fp8,0,0.29870933294296265
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,1,1,128,1,float16,float16,0,0.18959466616312662
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,1,1,128,1,float16,float16,0,0.1916373372077942
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,1,1,128,1,float16,float16,0,0.14857600132624307
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,10240,1,1,128,1,float16,fp8,0,0.18052266041437784
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,1,1,128,1,float16,fp8,0,0.1434879998366038
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,1,1,128,1,float16,float16,0,0.15043200055758157
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,10240,1,1,128,1,float16,fp8,0,0.1418826679388682
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,1,1,128,1,float16,float16,0,0.38045867284138996
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,1,1,128,1,float16,fp8,0,0.3627520004908244
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,1,1,128,1,float16,float16,0,0.38291200002034503
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,8192,1,1,128,1,float16,fp8,0,0.362064003944397
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,1,1,128,1,float16,float16,0,0.23334934314092
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,1,1,128,1,float16,fp8,0,0.21592533588409424
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,1,1,128,1,float16,float16,0,0.23388266563415527
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,8192,1,1,128,1,float16,fp8,0,0.21567465861638388
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,1,1,128,1,float16,float16,0,0.13276799519856772
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,1,1,128,1,float16,fp8,0,0.1272640029589335
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,1,1,128,1,float16,float16,0,0.1328266660372416
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,8192,1,1,128,1,float16,fp8,0,0.12718933820724487
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,1,1,128,1,float16,float16,0,0.1225920021533966
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,1,1,128,1,float16,fp8,0,0.11857600013415019
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,1,1,128,1,float16,float16,0,0.12197333574295044
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,8192,1,1,128,1,float16,fp8,0,0.11786666512489319
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,1,1,128,1,float16,float16,0,0.2516480088233948
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,1,1,128,1,float16,fp8,0,0.24259734153747559
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,1,1,128,1,float16,float16,0,0.2529333432515462
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,6144,1,1,128,1,float16,fp8,0,0.24156266450881958
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,1,1,128,1,float16,float16,0,0.15132266283035278
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,1,1,128,1,float16,fp8,0,0.14434132973353067
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,1,1,128,1,float16,float16,0,0.15079999963442484
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,6144,1,1,128,1,float16,fp8,0,0.1437333325544993
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,1,1,128,1,float16,float16,0,0.10170132915178935
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,1,1,128,1,float16,fp8,0,0.10018133123715718
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,1,1,128,1,float16,float16,0,0.10154133041699727
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,6144,1,1,128,1,float16,fp8,0,0.09987733761469524
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,1,1,128,1,float16,float16,0,0.09493333101272583
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,1,1,128,1,float16,fp8,0,0.0929813285668691
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,1,1,128,1,float16,float16,0,0.09458133578300476
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,6144,1,1,128,1,float16,fp8,0,0.09262399872144063
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,1,1,128,1,float16,float16,0,0.24833599726359049
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,1,1,128,1,float16,fp8,0,0.2444480061531067
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,1,1,128,1,float16,float16,0,0.24849067131678262
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,1,1,128,1,float16,float16,0,0.14706666270891824
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,4096,1,1,128,1,float16,fp8,0,0.2442400058110555
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,1,1,128,1,float16,float16,0,0.1487573285897573
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,1,1,128,1,float16,fp8,0,0.1428000032901764
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,4096,1,1,128,1,float16,fp8,0,0.14221333463986716
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,1,1,128,1,float16,float16,0,0.08040533463160197
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,1,1,128,1,float16,fp8,0,0.08096000055472057
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,1,1,128,1,float16,float16,0,0.07983999947706859
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,1,1,128,1,float16,fp8,0,0.07283733288447063
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,4096,1,1,128,1,float16,fp8,0,0.08177066842714946
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,1,1,128,1,float16,float16,0,0.07125866909821828
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,1,1,128,1,float16,fp8,0,0.07313600182533264
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,4096,1,1,128,1,float16,float16,0,0.07193066676457723
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,1,1,128,1,float16,float16,0,0.06730666756629944
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,1,1,128,1,float16,fp8,0,0.06759466727574666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,1,1,128,1,float16,float16,0,0.06702400247255962
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,4096,1,1,128,1,float16,fp8,0,0.06739200154940288
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,1,1,128,1,float16,float16,0,0.1722453236579895
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,1,1,128,1,float16,fp8,0,0.1721173326174418
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,1,1,128,1,float16,float16,0,0.1723680098851522
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,1,1,128,1,float16,float16,0,0.09903466701507568
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,3072,1,1,128,1,float16,fp8,0,0.17404800653457642
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,1,1,128,1,float16,fp8,0,0.09830400347709656
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,1,1,128,1,float16,float16,0,0.09794666369756062
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,1,1,128,1,float16,float16,0,0.06313066681226094
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,3072,1,1,128,1,float16,fp8,0,0.09895466764767964
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,1,1,128,1,float16,fp8,0,0.05951466659704844
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,1,1,128,1,float16,fp8,0,0.0660159985224406
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,1,1,128,1,float16,float16,0,0.06389333307743073
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,3072,1,1,128,1,float16,fp8,0,0.0670666644970576
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,1,1,128,1,float16,float16,0,0.057114665706952415
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,1,1,128,1,float16,float16,0,0.0574186642964681
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,3072,1,1,128,1,float16,fp8,0,0.059279998143514
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,1,1,128,1,float16,float16,0,0.053743998209635414
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,1,1,128,1,float16,fp8,0,0.05529599885145823
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,1,1,128,1,float16,float16,0,0.05366933345794678
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,3072,1,1,128,1,float16,fp8,0,0.056287998954455055
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,1,1,128,1,float16,float16,0,0.17996267477671304
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,1,1,128,1,float16,fp8,0,0.1853813330332438
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,1,1,128,1,float16,float16,0,0.1800640026728312
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,2048,1,1,128,1,float16,fp8,0,0.18505066633224487
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,1,1,128,1,float16,float16,0,0.1037546694278717
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,1,1,128,1,float16,fp8,0,0.10301333665847778
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,1,1,128,1,float16,float16,0,0.10441066821416219
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,2048,1,1,128,1,float16,fp8,0,0.10309333602587382
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,1,1,128,1,float16,float16,0,0.05529599885145823
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,1,1,128,1,float16,fp8,0,0.05684266487757365
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,1,1,128,1,float16,float16,0,0.054661333560943604
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,2048,1,1,128,1,float16,fp8,0,0.05682666599750519
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,1,1,128,1,float16,fp8,0,0.04877333343029022
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,1,1,128,1,float16,float16,0,0.04604266583919525
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,1,1,128,1,float16,fp8,0,0.04886933167775472
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,2048,1,1,128,1,float16,float16,0,0.046298667788505554
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,1,1,128,1,float16,float16,0,0.042064001162846885
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,1,1,128,1,float16,fp8,0,0.03995733211437861
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,1,1,128,1,float16,fp8,0,0.04403733213742574
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,1,1,128,1,float16,float16,0,0.04187199970086416
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,2048,1,1,128,1,float16,fp8,0,0.04452799757321676
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,1,1,128,1,float16,float16,0,0.12818132837613425
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,1,1,128,1,float16,float16,0,0.03904533386230469
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,1,1,128,1,float16,float16,0,0.03934400031963984
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,2048,1,1,128,1,float16,fp8,0,0.04009066770474116
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,1,1,128,1,float16,fp8,0,0.13671466708183289
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,1,1,128,1,float16,float16,0,0.1285599966843923
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1536,1,1,128,1,float16,fp8,0,0.13613333304723105
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,1,1,128,1,float16,float16,0,0.0712960014740626
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,1,1,128,1,float16,fp8,0,0.07389866809050243
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,1,1,128,1,float16,float16,0,0.07283733288447063
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1536,1,1,128,1,float16,fp8,0,0.0739519993464152
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,1,1,128,1,float16,float16,0,0.04397333165009817
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,1,1,128,1,float16,fp8,0,0.04920533299446106
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,1,1,128,1,float16,float16,0,0.04407466451327006
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1536,1,1,128,1,float16,fp8,0,0.04909333089987437
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,1,1,128,1,float16,float16,0,0.03811733424663544
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,1,1,128,1,float16,fp8,0,0.04228800038496653
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,1,1,128,1,float16,float16,0,0.03817066550254822
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1536,1,1,128,1,float16,fp8,0,0.04301866888999939
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,1,1,128,1,float16,float16,0,0.03478399912516276
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,1,1,128,1,float16,fp8,0,0.038549333810806274
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,1,1,128,1,float16,float16,0,0.034448000291983284
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1536,1,1,128,1,float16,fp8,0,0.03857066730658213
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,1,1,128,1,float16,float16,0,0.03277866790692011
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,1,1,128,1,float16,fp8,0,0.035274667044480644
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,1,1,128,1,float16,float16,0,0.032655999064445496
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1536,1,1,128,1,float16,fp8,0,0.03568000098069509
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,1,1,128,1,float16,float16,0,0.14522666732470194
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,1,1,128,1,float16,fp8,0,0.15870400269826254
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,1,1,128,1,float16,float16,0,0.14595199624697366
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,1,1,128,1,float16,float16,0,0.08334400256474812
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,1,1,128,1,float16,float16,0,0.04159999887148539
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,1024,1,1,128,1,float16,fp8,0,0.15890133380889893
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,1,1,128,1,float16,fp8,0,0.08798933029174805
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,1,1,128,1,float16,float16,0,0.08340266346931458
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,1024,1,1,128,1,float16,fp8,0,0.08749333024024963
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,1,1,128,1,float16,fp8,0,0.04737600187460581
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,1,1,128,1,float16,float16,0,0.04228266576925913
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,1024,1,1,128,1,float16,fp8,0,0.04684799909591675
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,1,1,128,1,float16,float16,0,0.033530667424201965
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,1,1,128,1,float16,fp8,0,0.03411199897527695
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,1,1,128,1,float16,float16,0,0.029194665451844532
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,1,1,128,1,float16,fp8,0,0.03872533390919367
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,1,1,128,1,float16,float16,0,0.0336053321758906
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,1024,1,1,128,1,float16,fp8,0,0.038986665507157646
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,1,1,128,1,float16,float16,0,0.029504001140594482
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,1024,1,1,128,1,float16,fp8,0,0.034154665966828666
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,1,1,128,1,float16,float16,0,0.026752000053723652
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,1,1,128,1,float16,fp8,0,0.030421334008375805
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,1,1,128,1,float16,float16,0,0.026608000199000042
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,1024,1,1,128,1,float16,fp8,0,0.030437332888444264
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,1,1,128,1,float16,float16,0,0.02492266645034154
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,1,1,128,1,float16,fp8,0,0.02861333390076955
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,1,1,128,1,float16,float16,0,0.025386666258176167
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,1024,1,1,128,1,float16,fp8,0,0.028970666229724884
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,1,1,128,1,float16,float16,0,0.12116266290346782
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,1,1,128,1,float16,fp8,0,0.14008532961209616
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,1,1,128,1,float16,float16,0,0.12088533242543538
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,1,1,128,1,float16,float16,0,0.06865066786607106
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,512,1,1,128,1,float16,fp8,0,0.13987200458844504
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,1,1,128,1,float16,fp8,0,0.07545066873232524
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,1,1,128,1,float16,float16,0,0.06846400101979573
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,512,1,1,128,1,float16,fp8,0,0.07351466516653697
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,1,1,128,1,float16,float16,0,0.03465600063403448
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,1,1,128,1,float16,fp8,0,0.041573333243529
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,1,1,128,1,float16,float16,0,0.03512533257404963
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,512,1,1,128,1,float16,fp8,0,0.04200533529122671
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,1,1,128,1,float16,float16,0,0.027002667387326557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,1,1,128,1,float16,fp8,0,0.033557333052158356
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,1,1,128,1,float16,float16,0,0.027509334186712902
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,512,1,1,128,1,float16,fp8,0,0.03383466601371765
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,1,1,128,1,float16,fp8,0,0.029733332494894665
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,1,1,128,1,float16,float16,0,0.02314666658639908
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,1,1,128,1,float16,fp8,0,0.029733332494894665
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,512,1,1,128,1,float16,float16,0,0.023226665953795116
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,1,1,128,1,float16,float16,0,0.020618667205174763
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,1,1,128,1,float16,fp8,0,0.025392000873883564
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,1,1,128,1,float16,float16,0,0.02049066623051961
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,512,1,1,128,1,float16,fp8,0,0.025306666890780132
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,1,1,128,1,float16,float16,0,0.018954666952292126
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,1,1,128,1,float16,fp8,0,0.023962666591008503
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,1,1,128,1,float16,fp8,0,0.02327999969323476
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,1,1,128,1,float16,float16,0,0.018901333212852478
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,512,1,1,128,1,float16,fp8,0,0.02380799998839696
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,1,1,128,1,float16,float16,0,0.01869333287080129
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,1,1,128,1,float16,float16,0,0.018186666071414948
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,512,1,1,128,1,float16,fp8,0,0.022917332748572033
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,1,1,128,1,float16,float16,0,0.06312533219655354
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,1,1,128,1,float16,fp8,0,0.06554666658242543
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,1,1,128,1,float16,float16,0,0.06242666641871134
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,1,1,128,1,float16,float16,0,0.03257599969704946
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,256,1,1,128,1,float16,fp8,0,0.06600533425807953
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,1,1,128,1,float16,fp8,0,0.03882133215665817
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,1,1,128,1,float16,float16,0,0.032085334261258446
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,256,1,1,128,1,float16,fp8,0,0.03875733415285746
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,1,1,128,1,float16,float16,0,0.02438933402299881
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,1,1,128,1,float16,fp8,0,0.03108799954255422
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,1,1,128,1,float16,float16,0,0.024400000770886738
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,256,1,1,128,1,float16,fp8,0,0.03035199890534083
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,1,1,128,1,float16,float16,0,0.020362666497627895
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,1,1,128,1,float16,fp8,0,0.02659733345111211
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,1,1,128,1,float16,float16,0,0.0200853335360686
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,256,1,1,128,1,float16,fp8,0,0.026672000686327618
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,1,1,128,1,float16,float16,0,0.01746133342385292
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,1,1,128,1,float16,fp8,0,0.021498667697111767
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,1,1,128,1,float16,float16,0,0.017594666530688603
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,256,1,1,128,1,float16,fp8,0,0.0216799999276797
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,1,1,128,1,float16,float16,0,0.016303999970356624
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,1,1,128,1,float16,fp8,0,0.019738666713237762
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,1,1,128,1,float16,float16,0,0.01605333387851715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,256,1,1,128,1,float16,fp8,0,0.019962667177120846
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,1,1,128,1,float16,float16,0,0.015509333461523056
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,1,1,128,1,float16,fp8,0,0.01924266666173935
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,1,1,128,1,float16,float16,0,0.015381333728631338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,256,1,1,128,1,float16,fp8,0,0.01904533306757609
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,1,1,128,1,float16,float16,0,0.015386667102575302
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,1,1,128,1,float16,fp8,0,0.018863999595244724
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,1,1,128,1,float16,float16,0,0.015311999867359797
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,256,1,1,128,1,float16,fp8,0,0.019002666076024372
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,1,1,128,1,float16,float16,0,0.03084266682465871
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,1,1,128,1,float16,fp8,0,0.0349386657277743
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,1,1,128,1,float16,float16,0,0.03084266682465871
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,128,1,1,128,1,float16,fp8,0,0.03429866582155228
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,1,1,128,1,float16,float16,0,0.022848000129063923
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,1,1,128,1,float16,fp8,0,0.026261332134405773
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,1,1,128,1,float16,float16,0,0.02274133265018463
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,128,1,1,128,1,float16,fp8,0,0.02610666553179423
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,1,1,128,1,float16,float16,0,0.01887999971707662
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,1,1,128,1,float16,fp8,0,0.022266666094462078
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,1,1,128,1,float16,float16,0,0.018805333723624546
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,128,1,1,128,1,float16,fp8,0,0.02205866575241089
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,1,1,128,1,float16,float16,0,0.015840000162522
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,1,1,128,1,float16,fp8,0,0.018016000588734944
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,1,1,128,1,float16,float16,0,0.01569066693385442
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,128,1,1,128,1,float16,fp8,0,0.01823466643691063
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,1,1,128,1,float16,float16,0,0.01434133326013883
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,1,1,128,1,float16,fp8,0,0.01664000004529953
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,1,1,128,1,float16,float16,0,0.014373333503802618
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,128,1,1,128,1,float16,fp8,0,0.01635733370979627
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,1,1,128,1,float16,float16,0,0.013797332843144735
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,1,1,128,1,float16,fp8,0,0.015674666812022526
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,1,1,128,1,float16,float16,0,0.013823999712864557
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,128,1,1,128,1,float16,fp8,0,0.015989333391189575
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,1,1,128,1,float16,float16,0,0.013381333400805792
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,1,1,128,1,float16,fp8,0,0.015754666179418564
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,1,1,128,1,float16,float16,0,0.013418667018413544
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,128,1,1,128,1,float16,fp8,0,0.015599999576807022
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,1,1,128,1,float16,float16,0,0.01333333303531011
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,1,1,128,1,float16,fp8,0,0.015311999867359797
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,1,1,128,1,float16,float16,0,0.013525333255529404
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,128,1,1,128,1,float16,fp8,0,0.015872000406185787
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,1,1,128,1,float16,float16,0,0.021322667598724365
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,1,1,128,1,float16,fp8,0,0.023221333821614582
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,1,1,128,1,float16,float16,0,0.021269333859284718
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,64,1,1,128,1,float16,fp8,0,0.0227360005180041
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,1,1,128,1,float16,float16,0,0.01718933383623759
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,1,1,128,1,float16,fp8,0,0.015429332852363586
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,1,1,128,1,float16,float16,0,0.014303999642531076
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,1,1,128,1,float16,fp8,0,0.01850133389234543
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,1,1,128,1,float16,float16,0,0.016890666137139004
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,64,1,1,128,1,float16,fp8,0,0.018735999862353008
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,1,1,128,1,float16,float16,0,0.014645333091417948
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,64,1,1,128,1,float16,fp8,0,0.015050667027632395
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,1,1,128,1,float16,float16,0,0.012645332763592402
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,1,1,128,1,float16,fp8,0,0.013471999516089758
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,1,1,128,1,float16,float16,0,0.012698666503032049
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,64,1,1,128,1,float16,fp8,0,0.01322666679819425
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,1,1,128,1,float16,float16,0,0.011946666985750198
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,1,1,128,1,float16,float16,0,0.011407999942700068
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,1,1,128,1,float16,fp8,0,0.012730666746695837
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,1,1,128,1,float16,float16,0,0.012298667182525
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,64,1,1,128,1,float16,fp8,0,0.01251199965675672
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,1,1,128,1,float16,float16,0,0.011445333560307821
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,1,1,128,1,float16,fp8,0,0.012416000167528788
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,64,1,1,128,1,float16,fp8,0,0.012144000579913458
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,1,1,128,1,float16,float16,0,0.011365332951148352
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,1,1,128,1,float16,fp8,0,0.012181332955757776
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,1,1,128,1,float16,float16,0,0.011450666934251785
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,64,1,1,128,1,float16,fp8,0,0.01210133358836174
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,1,1,128,1,float16,float16,0,0.011157333850860596
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,1,1,128,1,float16,fp8,0,0.012389333297808966
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,1,1,128,1,float16,float16,0,0.011114666859308878
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,64,1,1,128,1,float16,fp8,0,0.012165332833925882
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,1,1,128,1,float16,float16,0,0.017322666943073273
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,1,1,128,1,float16,fp8,0,0.01887999971707662
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,1,1,128,1,float16,float16,0,0.017263999829689663
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,1,1,128,1,float16,float16,0,0.01462399959564209
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,32,1,1,128,1,float16,fp8,0,0.018650667121013004
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,1,1,128,1,float16,fp8,0,0.01551466683546702
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,1,1,128,1,float16,float16,0,0.014501333236694336
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,32,1,1,128,1,float16,fp8,0,0.01543466622630755
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,1,1,128,1,float16,float16,0,0.013546666751305262
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,1,1,128,1,float16,fp8,0,0.01360000049074491
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,1,1,128,1,float16,float16,0,0.01351999988158544
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,1,1,128,1,float16,float16,0,0.012682666381200155
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,32,1,1,128,1,float16,fp8,0,0.013888000200192133
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,1,1,128,1,float16,float16,0,0.012351999680201212
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,1,1,128,1,float16,fp8,0,0.012624000509579977
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,32,1,1,128,1,float16,fp8,0,0.013114667187134424
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,1,1,128,1,float16,float16,0,0.01184533288081487
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,1,1,128,1,float16,fp8,0,0.012458667159080505
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,1,1,128,1,float16,float16,0,0.011962667107582092
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,32,1,1,128,1,float16,fp8,0,0.012671999633312225
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,1,1,128,1,float16,float16,0,0.011424000064531961
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,1,1,128,1,float16,fp8,0,0.012335999558369318
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,1,1,128,1,float16,float16,0,0.011450666934251785
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,32,1,1,128,1,float16,fp8,0,0.012047999848922094
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,1,1,128,1,float16,float16,0,0.011482667177915573
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,1,1,128,1,float16,fp8,0,0.011813333878914515
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,1,1,128,1,float16,float16,0,0.011141333729028702
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,32,1,1,128,1,float16,fp8,0,0.01221866657336553
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,1,1,128,1,float16,float16,0,0.011045332998037338
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,1,1,128,1,float16,fp8,0,0.011706666400035223
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,1,1,128,1,float16,float16,0,0.011178666104873022
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,1,1,128,1,float16,fp8,0,0.016143999993801117
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,32,1,1,128,1,float16,fp8,0,0.01180800050497055
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,1,1,128,1,float16,float16,0,0.015087999403476715
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,1,1,128,1,float16,float16,0,0.014959999670584997
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,1,1,128,1,float16,float16,0,0.013541333377361298
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,128,16,1,1,128,1,float16,fp8,0,0.01599466676513354
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,1,1,128,1,float16,fp8,0,0.013951999445756277
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,1,1,128,1,float16,fp8,0,0.013967999567588171
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,1,1,128,1,float16,float16,0,0.013434667140245438
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,64,16,1,1,128,1,float16,fp8,0,0.013893333574136099
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,1,1,128,1,float16,float16,0,0.012613333761692047
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,1,1,128,1,float16,float16,0,0.012703999876976013
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,1,1,128,1,float16,fp8,0,0.012639999389648438
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,32,16,1,1,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,1,1,128,1,float16,float16,0,0.012245333443085352
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,1,1,128,1,float16,fp8,0,0.012938667088747025
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,16,16,1,1,128,1,float16,float16,0,0.012378666549921036
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,1,1,128,1,float16,float16,0,0.011877333124478659
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,1,1,128,1,float16,fp8,0,0.012362666428089142
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,1,1,128,1,float16,fp8,0,0.012634667257467905
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,8,16,1,1,128,1,float16,float16,0,0.011744000017642975
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,1,1,128,1,float16,float16,0,0.011258666714032492
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,1,1,128,1,float16,fp8,0,0.01210133358836174
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,1,1,128,1,float16,float16,0,0.011589333415031433
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,4,16,1,1,128,1,float16,fp8,0,0.01209066684047381
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,1,1,128,1,float16,float16,0,0.011258666714032492
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,1,1,128,1,float16,fp8,0,0.011749333391586939
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,1,1,128,1,float16,float16,0,0.011296000331640244
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,2,16,1,1,128,1,float16,fp8,0,0.011802667131026586
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,1,1,128,1,float16,float16,0,0.01097600037852923
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,1,1,128,1,float16,fp8,0,0.011909333368142446
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,1,1,128,1,float16,float16,0,0.01138666644692421
VLLM,0.12.0,NVIDIA H200,context_attention,vllm_flash_attn,1,16,1,1,128,1,float16,fp8,0,0.01180800050497055
