framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,1,16,1,0,0.1018933355808258
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,1,16,16,0,0.09990400075912476
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,1,16,16,0,0.2262666622797648
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,1,16,32,0,0.09934399525324504
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,1,16,32,0,0.222271998723348
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,1,16,64,0,0.1553813318411509
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,1,16,64,0,0.2734559973080953
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,1,16,128,0,0.15518400073051453
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,1,16,128,0,0.2050559918085734
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,2,16,1,0,0.09824533263842265
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,2,16,1,0,0.1356160044670105
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,2,16,2,0,0.09821333487828572
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,2,16,2,0,0.22157333294550577
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,1,16,1,0,0.1639840006828308
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,2,16,4,0,0.09880533814430237
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,2,16,8,0,0.09990933537483215
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,2,16,4,0,0.17504000663757324
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,2,16,8,0,0.2241333325703939
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,2,16,16,0,0.10652266939481099
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,2,16,16,0,0.13731200496355692
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,2,16,32,0,0.10341333349545796
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,2,16,32,0,0.2242400050163269
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,2,16,64,0,0.1586079994837443
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,2,16,64,0,0.22214933236440024
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,2,16,128,0,0.16617600123087564
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,2,16,128,0,0.19742933909098306
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,4,16,1,0,0.10392533739407857
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,4,16,1,0,0.13922133048375449
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,4,16,2,0,0.13317867120107016
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,4,16,2,0,0.1532960037390391
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,4,16,4,0,0.10348266363143921
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,4,16,4,0,0.13779200116793314
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,4,16,8,0,0.0997920036315918
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,4,16,8,0,0.1383946637312571
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,4,16,16,0,0.11994133392969768
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,4,16,16,0,0.22549867630004883
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,4,16,32,0,0.15687466661135355
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,4,16,32,0,0.22411733865737915
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,4,16,64,0,0.2209440072377523
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,4,16,64,0,0.157258669535319
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,4,16,128,0,0.1592586636543274
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,4,16,128,0,0.20416533946990967
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,8,16,1,0,0.10345600048700969
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,8,16,1,0,0.14831466476122537
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,8,16,2,0,0.16713599363962808
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,8,16,2,0,0.15640532970428467
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,8,16,4,0,0.13917866349220276
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,8,16,4,0,0.19261866807937622
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,8,16,8,0,0.15733866890271506
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,8,16,8,0,0.21826666593551636
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,1,16,8,0,0.10961600144704182
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,8,16,16,0,0.1564906636873881
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,8,16,16,0,0.2346293330192566
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,8,16,32,0,0.22154132525126138
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,8,16,32,0,0.1427893340587616
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,8,16,64,0,0.16022400061289468
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,8,16,64,0,0.22408533096313477
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,8,16,128,0,0.16659200191497803
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,1,16,2,0,0.1625386675198873
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,8,16,128,0,0.13117866714795431
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,16,16,1,0,0.10534399747848511
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,16,16,1,0,0.24026666084925333
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,16,16,2,0,0.15409599741299948
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,16,16,2,0,0.1604586640993754
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,16,16,4,0,0.15899200240770975
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,16,16,4,0,0.14853866895039877
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,16,16,8,0,0.15956800182660422
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,16,16,16,0,0.1556533376375834
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,16,16,8,0,0.16461333632469177
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,16,16,16,0,0.22665599981943765
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,16,16,32,0,0.13375999530156454
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,16,16,32,0,0.23774399360020956
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,16,16,64,0,0.125791996717453
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,16,16,64,0,0.224506676197052
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,16,16,128,0,0.1667626698811849
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,16,16,128,0,0.14849066734313965
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,1,16,8,0,0.22342934211095175
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,32,16,1,0,0.1630400021870931
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,32,16,2,0,0.12325867017110188
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,32,16,2,0,0.24733332792917886
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,32,16,1,0,0.4105653365453084
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,32,16,4,0,0.21919999519983926
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,32,16,4,0,0.15938133001327515
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,32,16,8,0,0.13106133540471396
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,32,16,8,0,0.17906665802001953
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,32,16,16,0,0.23030932744344076
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,32,16,16,0,0.1628320018450419
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,32,16,32,0,0.16044800480206808
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,32,16,32,0,0.23597333828608194
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,32,16,64,0,0.2169439991315206
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,32,16,128,0,0.15690132975578308
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,32,16,64,0,0.2266719937324524
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,32,16,128,0,0.14519466956456503
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,64,16,2,0,0.1694399913152059
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,64,16,2,0,0.4673706690470378
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,64,16,4,0,0.10932266712188721
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,64,16,4,0,0.3031839927037557
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,64,16,8,0,0.16321600476900736
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,64,16,8,0,0.22223466634750366
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,64,16,1,0,0.776634693145752
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,64,16,1,0,0.2905386686325073
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,64,16,16,0,0.16060800353686014
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,64,16,16,0,0.19065600633621216
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,64,16,32,0,0.10167466600735982
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,64,16,32,0,0.2379093368848165
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,64,16,64,0,0.15821333726247153
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,64,16,64,0,0.16165332992871603
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,64,16,128,0,0.12083733081817627
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,64,16,128,0,0.21186665693918863
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,1,16,2,0,0.14617066582043967
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,1,16,4,0,0.10416000088055928
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,1,16,4,0,0.5296586751937866
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,128,16,2,0,0.2873706618944804
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,128,16,2,0,0.8963519732157389
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,128,16,1,0,0.5344746510187784
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,128,16,1,0,1.5175573031107585
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,128,16,4,0,0.1606773336728414
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,128,16,16,0,0.1591093341509501
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,128,16,4,0,0.5804640054702759
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,128,16,8,0,0.17411200205485025
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,128,16,8,0,0.4087306658426921
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,128,16,32,0,0.3102719982465108
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,128,16,32,0,0.1691946585973104
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,128,16,16,0,0.3396746714909871
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,128,16,64,0,0.15996799866358438
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,128,16,64,0,0.25935999552408856
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,128,16,128,0,0.23718933264414468
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,128,16,128,0,0.10707199573516846
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,256,16,2,0,0.5233866771062216
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,256,16,4,0,0.2939466635386149
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,256,16,2,0,1.7342185974121094
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,256,16,1,0,0.985578695933024
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,256,16,8,0,0.17369065682093301
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,256,16,4,0,1.1303839683532715
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,256,16,1,0,2.9205493927001953
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,256,16,16,0,0.13206400473912558
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,256,16,8,0,0.7959840297698975
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,256,16,16,0,0.6422079801559448
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,256,16,32,0,0.16153599818547568
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,1,32,1,0,0.1648373305797577
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,256,16,32,0,0.5640106598536173
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,1,32,1,0,0.21986132860183716
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,1,32,2,0,0.16583466529846191
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,256,16,64,0,0.1516586641470591
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,256,16,64,0,0.49274667104085285
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,1,32,2,0,0.22449066241582236
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,256,16,128,0,1.0944426854451497
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,256,16,128,0,0.4344853162765503
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,1,32,4,0,0.1663040022055308
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,1,32,8,0,0.15652267138163248
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,1,32,4,0,0.1716853380203247
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,1,32,8,0,0.3062186638514201
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,1,32,16,0,0.15735466281572977
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,1,32,16,0,0.2172586719195048
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,1,32,32,0,0.1827146609624227
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,1,32,64,0,0.5148906707763672
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,1,32,64,0,0.17505067586898804
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,1,32,32,0,0.1593119998772939
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,1,32,128,0,0.1694773236910502
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,1,32,128,0,0.18678400913874307
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,2,32,1,0,0.17645333210627237
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,2,32,2,0,0.1518933375676473
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,2,32,1,0,0.6445866823196411
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,2,32,2,0,0.2229386568069458
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,2,32,4,0,0.1637386679649353
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,2,32,8,0,0.16617600123087564
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,2,32,8,0,0.21880000829696655
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,2,32,16,0,0.15454933047294617
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,2,32,4,0,0.22092799345652261
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,2,32,16,0,0.16839466492335
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,2,32,32,0,0.15806399782498678
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,2,32,64,0,0.15727466344833374
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,2,32,32,0,0.15812800327936807
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,2,32,64,0,0.22463999191919962
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,2,32,128,0,0.1594986617565155
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,2,32,128,0,0.9125066598256429
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,4,32,1,0,0.16134400169054666
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,4,32,1,0,0.13959466417630514
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,4,32,2,0,0.11578133702278137
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,4,32,4,0,0.2328746716181437
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,4,32,4,0,0.16219199697176614
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,4,32,8,0,0.2132373253504435
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,4,32,2,0,0.22817067305246988
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,4,32,8,0,0.15997866789499918
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,4,32,16,0,0.16176533699035645
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,4,32,32,0,0.2219466765721639
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,4,32,32,0,0.1694506605466207
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,4,32,64,0,0.42858131726582843
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,4,32,64,0,0.22367999951044717
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,4,32,16,0,0.4508266846338908
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,4,32,128,0,0.1562986671924591
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,4,32,128,0,0.1991999944051107
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,8,32,2,0,0.1699519952138265
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,8,32,1,0,0.16312000155448914
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,8,32,2,0,1.16649063428243
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,8,32,4,0,0.15556266903877258
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,8,32,4,0,0.22788800795873007
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,8,32,8,0,0.22019733985265097
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,8,32,8,0,0.1609760026137034
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,8,32,1,0,0.24463999271392822
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,8,32,16,0,0.1638826628526052
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,8,32,16,0,0.21222400665283203
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,8,32,32,0,0.22662933667500815
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,8,32,32,0,0.7588106791178385
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,8,32,128,0,0.15587733189264932
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,8,32,64,0,0.15611199537913004
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,8,32,64,0,0.22235733270645142
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,8,32,128,0,0.2002026637395223
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,16,32,1,0,0.1656053364276886
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,16,32,2,0,0.24187199274698892
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,16,32,2,0,0.14486933747927347
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,16,32,1,0,0.41204265753428143
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,16,32,4,0,0.13083733121554056
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,16,32,8,0,1.1161920229593914
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,16,32,4,0,0.2291733423868815
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,16,32,16,0,0.22020266453425089
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,16,32,8,0,1.1801599661509197
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,16,32,16,0,0.1627253293991089
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,16,32,32,0,0.155541330575943
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,16,32,32,0,0.18010665973027548
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,16,32,64,0,0.10151466727256775
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,16,32,64,0,0.22420267264048258
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,16,32,128,0,0.15632533033688864
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,16,32,128,0,0.2026240030924479
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,32,32,1,0,0.27848533789316815
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,32,32,4,0,0.15904532869656882
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,32,32,2,0,0.17574399709701538
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,32,32,4,0,0.28885867198308307
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,32,32,2,0,0.452458659807841
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,32,32,1,0,0.769050677617391
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,32,32,8,0,0.15652267138163248
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,32,32,8,0,0.23086400826772055
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,32,32,16,0,0.16330132881800333
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,32,32,32,0,0.48423465092976886
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,32,32,16,0,0.2342453400293986
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,32,32,32,0,0.17011733849843344
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,32,32,64,0,0.16125333309173584
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,32,32,64,0,0.18920532862345377
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,32,32,128,0,0.14703999956448874
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,32,32,128,0,0.34676265716552734
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,64,32,1,0,0.5197173357009888
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,64,32,4,0,0.17242133617401123
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,64,32,2,0,0.28563199440638226
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,64,32,1,0,1.4747254053751628
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,64,32,4,0,0.5622613430023193
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,64,32,2,0,0.862506628036499
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,64,32,8,0,0.1306933363278707
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,64,32,16,0,0.17209599415461221
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,64,32,8,0,0.38302401701609295
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,64,32,16,0,0.30642133951187134
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,64,32,32,0,0.16293866435686746
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,64,32,64,0,0.15377599994341531
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,64,32,32,0,0.2784586747487386
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,64,32,64,0,0.25668267409006756
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,64,32,128,0,0.23774933815002441
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,64,32,128,0,0.2307786742846171
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,128,32,2,0,0.5229599873224894
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,128,32,4,0,0.29148799180984497
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,128,32,8,0,0.1779200037320455
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,128,32,1,0,0.9916799863179525
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,128,32,2,0,1.6761600176493328
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,128,32,4,0,1.0656479994455974
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,128,32,1,0,2.8735148111979165
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,128,32,8,0,0.7516853014628092
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,128,32,16,0,0.1455519994099935
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,128,32,32,0,0.1530453364054362
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,128,32,16,0,0.5824000040690104
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,128,32,64,0,0.1630293329556783
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,128,32,32,0,0.5119200150171915
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,128,32,64,0,0.46435733636220294
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,128,32,128,0,0.16214932998021445
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,128,32,128,0,0.40644268194834393
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,256,32,1,0,1.9173760414123535
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,256,32,2,0,0.98853866259257
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,256,32,4,0,0.5310293436050415
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,256,32,2,0,3.3343785603841147
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,256,32,8,0,0.3013813296953837
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,256,32,8,0,1.4469547271728516
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,256,32,4,0,2.063370704650879
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,256,32,16,0,0.18428266048431396
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,256,32,1,0,5.69264539082845
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,256,32,32,0,0.164192001024882
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,256,32,16,0,1.1372426350911458
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,256,32,64,0,0.16473066806793213
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,256,32,32,0,0.9976320266723633
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,1,64,1,0,0.1536960005760193
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,256,32,64,0,0.908677339553833
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,1,64,2,0,0.09494933485984802
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,1,64,1,0,0.15844800074895224
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,256,32,128,0,0.16450666387875876
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,1,64,2,0,0.22250133752822876
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,1,64,4,0,0.12503467003504434
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,1,64,4,0,0.22392000754674277
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,256,32,128,0,0.7777439753214518
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,1,64,8,0,0.16116266449292502
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,1,64,32,0,0.15808000167210898
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,1,64,16,0,0.1653493344783783
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,1,64,8,0,0.22140266497929892
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,1,64,16,0,0.2162933349609375
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,1,64,64,0,0.16403733690579733
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,1,64,32,0,0.2158880035082499
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,1,64,64,0,0.22247467438379923
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,1,64,128,0,0.15827199816703796
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,1,64,128,0,0.2028800050417582
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,2,64,1,0,0.16159466902414957
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,2,64,2,0,0.2513706684112549
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,2,64,1,0,0.17397334178288779
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,2,64,4,0,0.15296000242233276
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,2,64,4,0,0.22462934255599976
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,2,64,2,0,0.2180639902750651
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,2,64,8,0,0.1604639987150828
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,2,64,8,0,0.22447999318440756
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,2,64,16,0,0.22682666778564453
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,2,64,16,0,0.15633599956830344
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,2,64,32,0,0.15411200126012167
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,2,64,64,0,0.1583093305428823
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,2,64,32,0,0.22276800870895386
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,2,64,64,0,1.4177333513895671
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,2,64,128,0,0.16022933522860208
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,2,64,128,0,0.14149866501490274
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,4,64,1,0,0.10262933373451233
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,4,64,2,0,0.22445333003997803
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,4,64,2,0,1.1708959738413494
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,4,64,4,0,0.1644266645113627
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,4,64,8,0,0.1556106706460317
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,4,64,1,0,1.171173334121704
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,4,64,4,0,0.22934933503468832
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,4,64,8,0,0.2209706703821818
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,4,64,16,0,0.1609386702378591
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,4,64,16,0,0.22246400515238443
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,4,64,32,0,0.16080533464749655
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,4,64,32,0,0.21893332401911417
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,4,64,64,0,0.5745600064595541
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,4,64,64,0,0.23028800884882608
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,4,64,128,0,0.21071465810139975
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,4,64,128,0,0.1786080002784729
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,8,64,2,0,0.16496533155441284
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,8,64,2,0,0.24581333001454672
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,8,64,1,0,0.315829336643219
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,8,64,1,0,0.4084800084431966
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,8,64,4,0,0.1846026579538981
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,8,64,8,0,0.15611732999483743
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,8,64,4,0,0.1572106679280599
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,8,64,8,0,0.21729065974553427
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,8,64,16,0,0.15532799561818442
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,8,64,16,0,0.22684800624847412
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,8,64,32,0,0.2223680019378662
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,8,64,32,0,0.15965867042541504
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,8,64,64,0,0.28142400582631427
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,8,64,64,0,0.3026346762975057
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,8,64,128,0,0.19704532623291016
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,8,64,128,0,0.16527466972668967
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,16,64,2,0,0.17445866266886392
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,16,64,1,0,0.28651734193166095
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,16,64,1,0,0.7712693214416504
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,16,64,2,0,0.4488159815470378
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,16,64,8,0,0.16243732968966165
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,16,64,4,0,0.1625920037428538
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,16,64,4,0,0.2733440001805623
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,16,64,32,0,0.642906665802002
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,16,64,8,0,0.24784533182779947
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,16,64,16,0,0.15129599968592325
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,16,64,64,0,0.1630400021870931
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,16,64,16,0,0.22452799479166666
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,16,64,32,0,0.19821866353352866
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,16,64,128,0,0.166485329469045
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,16,64,64,0,0.5567839940388998
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,16,64,128,0,0.20690133174260458
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,32,64,1,0,0.5221226612726847
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,32,64,8,0,0.16339199741681418
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,32,64,4,0,0.16668800512949625
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,32,64,2,0,1.1873706976572673
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,32,64,1,0,1.4711146354675293
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,32,64,2,0,0.8475893338521322
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,32,64,8,0,0.35704533259073895
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,32,64,4,0,0.5273013512293497
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,32,64,16,0,0.16116266449292502
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,32,64,16,0,0.27802133560180664
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,32,64,32,0,0.157231996456782
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,32,64,32,0,0.23811199267705283
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,32,64,64,0,0.16022933522860208
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,32,64,64,0,0.22981866200764975
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,32,64,128,0,0.15863466262817383
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,32,64,128,0,0.20259199539820352
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,64,64,2,0,0.518453319867452
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,64,64,4,0,0.2909440000851949
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,64,64,1,0,0.9882826805114746
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,64,64,1,0,2.8764588038126626
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,64,64,8,0,1.1225706736246746
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,64,64,2,0,1.6328214009602864
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,64,64,8,0,0.6841920216878256
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,64,64,16,0,0.1290773351987203
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,64,64,4,0,1.0045866966247559
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,64,64,16,0,0.5492480198542277
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,64,64,32,0,0.1602186659971873
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,64,64,64,0,0.16301866372426352
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,64,64,32,0,0.4533119996388753
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,64,64,64,0,0.39741333325703937
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,64,64,128,0,0.256661335627238
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,64,64,128,0,0.360485315322876
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,128,64,4,0,0.530128002166748
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,128,64,2,0,0.9908320109049479
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,128,64,1,0,1.9188799858093262
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,128,64,8,0,0.2997066577275594
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,128,64,2,0,3.221205393473307
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,128,64,4,0,1.941439946492513
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,128,64,8,0,1.3307360013326008
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,128,64,16,0,0.1746399998664856
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,128,64,1,0,5.660213470458984
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,128,64,16,0,1.0160746574401855
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,128,64,32,0,0.13564800222714743
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,128,64,64,0,0.15445866187413534
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,128,64,32,0,0.863146702448527
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,128,64,64,0,0.7866133054097494
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,128,64,128,0,0.162581334511439
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,128,64,128,0,0.7072959740956625
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,256,64,2,0,1.9155839284261067
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,256,64,4,0,1.0093973477681477
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,256,64,8,0,0.5515520175298055
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,256,64,1,0,3.732096036275228
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,256,64,4,0,3.8230721155802407
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,256,64,2,0,6.321615854899089
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,256,64,16,0,0.32204800844192505
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,256,64,32,0,0.20752533276875815
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,256,64,8,0,2.5780533154805503
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,256,64,16,0,1.9810986518859863
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,256,64,1,0,11.243258158365885
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,1,128,1,0,0.19734400510787964
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,256,64,32,0,1.670805295308431
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,1,128,2,0,0.1571946640809377
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,256,64,64,0,0.14452266693115234
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,1,128,1,0,0.24421334266662598
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,256,64,64,0,1.5330133438110352
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,256,64,128,0,0.15924266974131265
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,1,128,2,0,0.25733333826065063
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,1,128,4,0,0.15663466850916544
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,256,64,128,0,1.4357973734537761
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,1,128,4,0,0.1977333426475525
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,1,128,8,0,0.15923200050989786
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,1,128,8,0,0.20668800671895346
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,1,128,16,0,0.1476906637350718
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,1,128,32,0,0.1622666617234548
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,1,128,32,0,0.21665600935618082
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,1,128,16,0,0.22457067171732584
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,1,128,128,0,0.19230933984120688
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,1,128,64,0,0.15717333555221558
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,1,128,64,0,0.17322667439778647
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,1,128,128,0,0.6813120047251383
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,2,128,1,0,0.160863995552063
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,2,128,2,0,0.15377066532770792
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,2,128,1,0,0.24228266874949136
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,2,128,2,0,0.2198879917462667
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,2,128,4,0,0.15892799695332846
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,2,128,4,0,0.22446932395299277
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,2,128,32,0,0.23319466908772787
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,2,128,8,0,0.15452266732851663
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,2,128,8,0,0.2273120085398356
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,2,128,16,0,0.16223466396331787
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,2,128,32,0,0.15755732854207358
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,2,128,16,0,0.2170026699701945
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,2,128,64,0,0.1563146710395813
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,2,128,64,0,0.17053866386413574
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,2,128,128,0,0.19801066319147745
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,2,128,128,0,0.16174933314323425
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,4,128,2,0,0.16701332728068033
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,4,128,1,0,0.1739786664644877
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,4,128,2,0,0.23466666539510092
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,4,128,8,0,0.15874133507410684
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,4,128,4,0,0.8821439743041992
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,4,128,4,0,0.19820799430211386
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,4,128,8,0,0.2244266668955485
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,4,128,1,0,0.4308160146077474
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,4,128,16,0,0.16051733493804932
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,4,128,32,0,0.16716265678405762
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,4,128,16,0,0.22864532470703125
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,4,128,32,0,0.22498667240142822
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,4,128,64,0,0.15637866655985513
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,4,128,64,0,0.23298666874567667
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,4,128,128,0,0.21101333697636923
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,4,128,128,0,0.20500266551971436
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,8,128,2,0,0.1752799948056539
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,8,128,1,0,0.30009599526723224
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,8,128,1,0,0.7964586416880289
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,8,128,4,0,0.1644266645113627
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,8,128,4,0,1.1114826997121174
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,8,128,2,0,0.42582400639851886
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,8,128,8,0,0.22801599899927774
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,8,128,8,0,0.1625440021355947
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,8,128,16,0,0.2220159967740377
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,8,128,16,0,0.16459199786186218
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,8,128,32,0,0.15616533160209656
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,8,128,32,0,0.2304640014966329
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,8,128,64,0,0.1591093341509501
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,8,128,128,0,0.164192001024882
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,8,128,64,0,0.9646453062693278
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,8,128,128,0,0.9325546423594157
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,16,128,2,0,0.2930293281873067
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,16,128,1,0,0.5396586656570435
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,16,128,4,0,0.17203734318415323
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,16,128,2,0,0.7981332937876383
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,16,128,4,0,0.47887468338012695
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,16,128,8,0,0.1604586640993754
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,16,128,8,0,0.30454399188359577
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,16,128,1,0,1.5291412671407063
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,16,128,32,0,0.1567306617895762
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,16,128,16,0,0.1225226620833079
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,16,128,16,0,0.2337226668993632
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,16,128,32,0,0.2276960015296936
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,16,128,64,0,0.15376533071200052
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,16,128,64,0,0.23587733507156372
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,16,128,128,0,1.113711992899577
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,16,128,128,0,0.20313066244125366
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,32,128,2,0,0.5376693407694498
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,32,128,4,0,0.2967039942741394
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,32,128,1,0,3.449909210205078
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,32,128,4,0,0.9074133237202963
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,32,128,8,0,0.17513600985209146
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,32,128,1,0,2.992879867553711
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,32,128,2,0,1.5259040196736653
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,32,128,8,0,0.5836533308029175
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,32,128,16,0,0.16895467042922974
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,32,128,64,0,0.15653866529464722
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,32,128,32,0,0.16104533274968466
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,32,128,16,0,0.4193066755930583
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,32,128,32,0,0.3482666810353597
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,32,128,128,0,0.1657813290754954
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,32,128,64,0,0.3054506580034892
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,32,128,128,0,0.27189866701761883
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,64,128,2,0,1.0221386750539143
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,64,128,4,0,0.5403199990590414
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,64,128,8,0,0.30606400966644287
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,64,128,1,0,1.988053321838379
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,64,128,4,0,1.7457440694173176
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,64,128,2,0,2.9760265350341797
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,64,128,16,0,0.18406933546066284
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,64,128,8,0,1.1208213170369465
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,64,128,32,0,0.12355732917785645
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,64,128,16,0,0.8095359802246094
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,64,128,32,0,0.6418399810791016
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,64,128,1,0,5.873754501342773
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,64,128,64,0,0.5767039855321249
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,64,128,128,0,0.1593653361002604
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,64,128,64,0,0.1611840029557546
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,64,128,128,0,0.5051306486129761
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,128,128,8,0,0.5631626844406128
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,128,128,2,0,1.996005376180013
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,128,128,4,0,1.0344746907552083
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,128,128,1,0,3.9068800608317056
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,128,128,4,0,3.4262240727742515
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,128,128,2,0,5.8701121012369795
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,128,128,16,0,0.3287840088208516
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,128,128,8,0,2.1749067306518555
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,128,128,16,0,1.5691893895467122
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,128,128,32,0,0.21017066637674967
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,128,128,32,0,1.2770559787750244
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,128,128,64,0,0.16037333011627197
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,128,128,1,0,11.667940775553385
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,128,128,128,0,0.13102933764457703
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,128,128,64,0,1.1156586805979412
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,128,128,128,0,0.9949706395467123
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,256,128,2,0,3.934687932332357
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,256,128,4,0,2.0343039830525718
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,256,128,8,0,1.0816160043080647
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,256,128,1,0,7.757770538330078
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,256,128,8,0,4.297072092692058
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,256,128,4,0,6.7881120045979815
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,256,128,32,0,0.3781813383102417
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,256,128,16,0,3.0874878565470376
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,256,128,2,0,11.668421427408854
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,256,128,64,0,0.2605653405189514
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,256,128,16,0,0.6139520009358724
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,256,128,32,0,2.4814027150472007
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,1,256,1,0,0.10847466190656026
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,256,128,128,0,0.18634666999181113
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,256,128,64,0,2.173136075337728
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,1,256,2,0,0.2783626715342204
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,1,256,1,0,0.16142933567365012
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,1,256,2,0,0.1281013290087382
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,1,256,4,0,0.7166453202565511
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,1,256,8,0,0.15656532843907675
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,1,256,4,0,0.1729546586672465
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,1,256,8,0,0.1646986703077952
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,1,256,16,0,0.13867732882499695
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,256,128,128,0,1.973903973897298
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,1,256,16,0,0.16044800480206808
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,1,256,32,0,0.15967466433842978
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,1,256,64,0,0.16039466857910156
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,256,128,1,0,21.820393880208332
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,1,256,64,0,1.1045599778493245
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,1,256,128,0,0.16110933820406595
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,1,256,32,0,0.16220266620318094
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,1,256,128,0,0.16022400061289468
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,2,256,1,0,0.1834026575088501
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,2,256,2,0,0.1646773318449656
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,2,256,1,0,0.1792800029118856
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,2,256,4,0,0.13235200444857279
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,2,256,2,0,0.14309866229693094
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,2,256,4,0,0.14018666744232178
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,2,256,8,0,0.1360266705354055
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,2,256,8,0,0.12052266796429952
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,2,256,16,0,0.12712533275286356
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,2,256,32,0,0.16767466068267822
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,2,256,16,0,0.17324266831080118
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,2,256,32,0,0.15889599919319153
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,2,256,64,0,0.1572160025437673
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,2,256,64,0,0.14566399653752646
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,2,256,128,0,0.11087999741236369
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,2,256,128,0,0.13395200173060098
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,4,256,1,0,0.3240320086479187
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,4,256,2,0,0.18078400691350302
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,4,256,1,0,0.24579733610153198
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,4,256,4,0,0.15991999705632529
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,4,256,2,0,0.16140799721082053
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,4,256,4,0,0.16134400169054666
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,4,256,8,0,0.1625866691271464
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,4,256,8,0,0.16426133116086325
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,4,256,16,0,0.16189866264661154
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,4,256,32,0,0.15896000464757284
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,4,256,16,0,0.16005333264668783
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,4,256,32,0,0.16643733779589334
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,4,256,64,0,0.1581439971923828
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,4,256,64,0,0.12730133533477783
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,4,256,128,0,0.22473067045211792
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,4,256,128,0,0.16235733032226562
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,8,256,2,0,0.31461334228515625
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,8,256,2,0,0.24876266717910767
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,8,256,1,0,0.496234655380249
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,8,256,1,0,0.5760906537373861
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,8,256,4,0,0.1824480096499125
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,8,256,8,0,0.16405866543451944
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,8,256,4,0,0.15292267004648843
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,8,256,8,0,0.15946132938067117
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,8,256,16,0,0.15940266847610474
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,8,256,16,0,0.8463626702626547
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,8,256,32,0,0.16402666767438254
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,8,256,32,0,0.12307733297348022
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,8,256,64,0,0.15570132931073508
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,8,256,64,0,0.11105599999427795
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,8,256,128,0,0.15968533356984457
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,8,256,128,0,0.15829867124557495
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,16,256,1,0,0.8389173348744711
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,16,256,2,0,0.5772480169932047
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,16,256,2,0,0.4586293299992879
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,16,256,1,0,1.105738639831543
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,16,256,4,0,0.3211146593093872
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,16,256,4,0,0.2544106642405192
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,16,256,8,0,0.18494399388631186
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,16,256,8,0,0.16180266936620077
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,16,256,16,0,0.16742400328318277
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,16,256,16,0,0.1307093302408854
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,16,256,32,0,0.16404267152150473
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,16,256,32,0,0.177946666876475
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,16,256,64,0,0.1637279987335205
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,16,256,128,0,0.15737066666285196
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,16,256,64,0,0.1646773318449656
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,16,256,128,0,0.15582933028539023
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,32,256,2,0,1.1093493302663167
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,32,256,4,0,0.5868533452351888
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,32,256,2,0,0.8433120250701904
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,32,256,8,0,0.26377065976460773
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,32,256,1,0,1.6251200040181477
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,32,256,1,0,2.150383949279785
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,32,256,16,0,0.19409600893656412
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,32,256,8,0,0.32601600885391235
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,32,256,16,0,0.16731733083724976
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,32,256,4,0,0.45732800165812176
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,32,256,32,0,0.16196800271670023
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,32,256,64,0,0.15639999508857727
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,32,256,32,0,0.17008533080418906
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,32,256,64,0,0.1560479998588562
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,32,256,128,0,0.12958932916323343
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,32,256,128,0,0.1611146628856659
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,64,256,2,0,1.6397652626037598
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,64,256,4,0,1.1369493007659912
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,64,256,4,0,0.8737013339996338
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,64,256,8,0,0.4821759859720866
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,64,256,1,0,4.247626622517903
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,64,256,2,0,2.961306571960449
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,64,256,8,0,0.6097226540247599
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,64,256,1,0,3.2054665883382163
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,64,256,16,0,0.34985601902008057
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,64,256,32,0,0.22014933824539185
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,64,256,16,0,0.2870453397432963
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,64,256,64,0,0.16770132382710776
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,64,256,32,0,0.1874026656150818
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,64,256,64,0,0.1545919974644979
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,64,256,128,0,0.16057599584261575
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,64,256,128,0,0.1209440032641093
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,128,256,8,0,1.1791573365529378
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,128,256,2,0,3.210293451944987
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,128,256,4,0,2.223519961039225
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,128,256,4,0,1.6817973454793294
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,128,256,8,0,0.9195466836293539
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,128,256,2,0,4.28115717569987
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,128,256,16,0,0.6618293523788452
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,128,256,1,0,6.306549072265625
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,128,256,16,0,0.5278986692428589
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,128,256,32,0,0.33860798676808673
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,128,256,32,0,0.40302932262420654
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,128,256,1,0,8.417434692382812
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,128,256,64,0,0.27184534072875977
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,1,512,1,0,0.16612799962361655
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,128,256,64,0,0.2392853299776713
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,1,512,1,0,0.20892266432444254
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,128,256,128,0,0.1952000061670939
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,1,512,2,0,0.16479999820391336
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,1,512,4,0,0.15722666184107462
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,1,512,2,0,0.1570080022017161
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,1,512,4,0,0.1687893271446228
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,128,256,128,0,0.1851573387781779
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,1,512,8,0,0.15785599748293558
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,1,512,8,0,0.16773333152135214
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,1,512,16,0,0.15868799885114035
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,1,512,32,0,0.15391467014948526
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,1,512,16,0,0.16312000155448914
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,1,512,32,0,1.2593066692352295
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,1,512,64,0,0.1588053305943807
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,1,512,128,0,0.16522666811943054
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,1,512,64,0,0.1662346621354421
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,1,512,128,0,0.1605226695537567
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,2,512,2,0,0.20813333988189697
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,2,512,1,0,0.254533330599467
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,2,512,2,0,0.17626667022705078
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,2,512,1,0,0.3675413529078166
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,2,512,4,0,0.14009599884351095
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,2,512,8,0,0.8908426761627197
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,2,512,4,0,0.1566986640294393
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,2,512,8,0,0.15878933668136597
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,2,512,16,0,0.20468266805013022
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,2,512,16,0,0.15876266360282898
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,2,512,32,0,0.15548800428708395
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,2,512,32,0,0.23868266741434732
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,2,512,64,0,0.15916267037391663
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,2,512,64,0,0.1806559960047404
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,2,512,128,0,1.1017599900563557
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,2,512,128,0,0.16455466548601785
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,4,512,2,0,0.36560531457265216
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,4,512,1,0,0.4694026708602905
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,4,512,1,0,0.6743413607279459
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,4,512,4,0,0.2070080041885376
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,4,512,2,0,0.2577066620190938
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,4,512,4,0,0.1686506668726603
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,4,512,8,0,0.16165866454442343
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,4,512,8,0,0.1704053282737732
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,4,512,16,0,0.12531200051307678
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,4,512,16,0,0.15035200119018555
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,4,512,32,0,0.15996266404787698
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,4,512,32,0,0.15557333827018738
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,4,512,64,0,0.7820959885915121
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,4,512,64,0,0.16061866283416748
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,4,512,128,0,0.15890666842460632
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,4,512,128,0,0.16164799531300864
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,8,512,4,0,0.26969067255655926
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,8,512,2,0,0.6765333016713461
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,8,512,2,0,0.47020800908406574
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,8,512,1,0,1.2931520144144695
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,8,512,8,0,0.21069333950678507
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,8,512,4,0,0.3642079830169678
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,8,512,8,0,0.1665600041548411
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,8,512,1,0,0.8942346572875977
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,8,512,32,0,0.15690666437149048
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,8,512,16,0,0.17075733343760172
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,8,512,16,0,0.1560479998588562
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,8,512,32,0,0.16665599743525186
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,8,512,64,0,0.16911466916402182
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,8,512,64,0,0.12639466921488443
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,8,512,128,0,0.15553067127863565
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,8,512,128,0,0.16056533654530844
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,16,512,1,0,2.5309279759724936
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,16,512,2,0,0.898693323135376
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,16,512,2,0,1.2918132940928142
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,16,512,1,0,1.7627466519673665
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,16,512,4,0,0.6744799613952637
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,16,512,8,0,0.3725653489430745
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,16,512,4,0,0.4818506638209025
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,16,512,16,0,0.24927467107772827
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,16,512,8,0,0.2783413330713908
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,16,512,16,0,0.1800373395284017
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,16,512,32,0,0.17293334007263184
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,16,512,64,0,0.16030399998029074
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,16,512,64,0,0.12237333257993062
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,16,512,128,0,0.15595733126004538
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,16,512,32,0,0.15583466490109762
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,16,512,128,0,0.12971199552218118
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,32,512,4,0,1.2977386315663655
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,32,512,2,0,1.7422134081522624
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,32,512,8,0,0.6999413172403971
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,32,512,4,0,0.9203999837239584
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,32,512,2,0,2.4941333134969077
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,32,512,8,0,0.5077973206837972
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,32,512,16,0,0.4464159806569417
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,32,512,1,0,3.39790407816569
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,32,512,32,0,0.27035200595855713
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,32,512,16,0,0.316048006216685
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,32,512,32,0,0.1972800095876058
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,32,512,1,0,5.008981386820476
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,32,512,128,0,0.15647466977437338
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,32,512,64,0,0.1827626625696818
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,32,512,64,0,0.14173332850138345
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,32,512,128,0,0.12480533123016357
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,64,512,4,0,1.7783625920613606
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,64,512,2,0,3.4133227666219077
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,64,512,8,0,1.3539199829101562
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,64,512,2,0,4.980719884236653
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,64,512,4,0,2.545039971669515
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,64,512,1,0,6.678576151529948
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,64,512,8,0,0.9636373519897461
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,64,512,16,0,0.8484586874643961
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,64,512,16,0,0.561621348063151
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,64,512,32,0,0.3531786600748698
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,64,512,64,0,0.3242453336715698
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,64,512,32,0,0.49851731459299725
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,64,512,1,0,9.913397471110025
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,64,512,64,0,0.24679466088612875
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,64,512,128,0,0.23731732368469238
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,64,512,128,0,0.1925920049349467
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,1,1024,1,0,0.28302399317423504
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,1,1024,1,0,0.28383467594782513
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,1,1024,4,0,0.15409599741299948
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,1,1024,4,0,0.15713066856066385
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,1,1024,2,0,0.1676959991455078
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,1,1024,2,0,0.1695093313852946
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,1,1024,16,0,0.15804266929626465
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,1,1024,8,0,1.1017333666483562
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,1,1024,16,0,0.27290666103363037
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,1,1024,32,0,0.17169066270192465
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,1,1024,8,0,0.15632533033688864
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,1,1024,32,0,0.14114133516947427
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,1,1024,64,0,0.1569866637388865
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,1,1024,64,0,0.16884799798329672
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,1,1024,128,0,0.16854933897654215
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,1,1024,128,0,0.18048532803853354
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,2,1024,1,0,0.5167573293050131
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,2,1024,2,0,0.3019733428955078
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,2,1024,2,0,0.28970666726430255
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,2,1024,1,0,0.5153173208236694
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,2,1024,4,0,0.1804800033569336
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,2,1024,4,0,0.16961065928141275
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,2,1024,8,0,0.164383997519811
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,2,1024,8,0,0.14140799641609192
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,2,1024,16,0,0.1644213298956553
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,2,1024,16,0,0.16195199886957803
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,2,1024,32,0,0.170415997505188
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,2,1024,32,0,0.16789867480595908
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,2,1024,64,0,0.1646613379319509
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,2,1024,128,0,0.1700906753540039
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,2,1024,128,0,0.17982399463653564
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,2,1024,64,0,0.18073066075642905
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,4,1024,2,0,0.5230933427810669
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,4,1024,2,0,0.5299946864446005
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,4,1024,1,0,1.0113706588745117
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,4,1024,4,0,0.2969546715418498
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,4,1024,1,0,0.9874239762624105
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,4,1024,4,0,0.2923520008722941
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,4,1024,8,0,0.17725332578023276
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,4,1024,16,0,0.17037866512934366
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,4,1024,8,0,0.182751993338267
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,4,1024,32,0,0.15939733386039734
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,4,1024,16,0,0.17483200629552206
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,4,1024,64,0,0.16460800170898438
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,4,1024,128,0,0.19333332777023315
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,4,1024,32,0,0.17656532923380533
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,4,1024,128,0,0.16582933068275452
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,4,1024,64,0,0.29076266288757324
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,8,1024,4,0,0.5374399820963541
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,8,1024,1,0,1.933066685994466
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,8,1024,4,0,0.5280373493830363
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,8,1024,2,0,0.990880012512207
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,8,1024,8,0,0.3025333285331726
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,8,1024,2,0,0.9887200196584066
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,8,1024,16,0,0.20003734032313028
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,8,1024,8,0,0.3012373248736064
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,8,1024,1,0,1.908938725789388
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,8,1024,32,0,0.16656532883644104
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,8,1024,16,0,0.2013439933458964
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,8,1024,32,0,0.1686506668726603
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,8,1024,64,0,0.16274666786193848
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,8,1024,64,0,0.17140799760818481
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,8,1024,128,0,0.16757865746816
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,8,1024,128,0,0.16382400194803873
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,16,1024,2,0,1.9229920705159504
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,16,1024,4,0,1.013760010401408
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,16,1024,2,0,1.9260692596435547
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,16,1024,8,0,0.5593706766764323
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,16,1024,4,0,1.016208012898763
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,16,1024,1,0,3.7644214630126953
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,16,1024,8,0,0.5561493237813314
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,16,1024,16,0,0.35145068168640137
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,16,1024,1,0,3.7434027989705405
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,16,1024,16,0,0.35053332646687824
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,16,1024,32,0,0.22505066792170206
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,16,1024,64,0,0.17374932765960693
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,16,1024,32,0,0.22618667284647623
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,16,1024,64,0,0.1773279905319214
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,16,1024,128,0,0.16952532529830933
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,16,1024,128,0,0.12944533427556357
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,32,1024,4,0,1.9731839497884114
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,32,1024,8,0,1.0623573462168376
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,32,1024,4,0,1.9768479665120442
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,32,1024,8,0,1.054693301518758
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,32,1024,2,0,3.7844320933024087
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,32,1024,2,0,3.7850828170776367
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,32,1024,16,0,0.6509226560592651
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,32,1024,16,0,0.6548320055007935
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,32,1024,1,0,7.384576161702474
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,32,1024,32,0,0.4026399850845337
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,32,1024,32,0,0.4049866596857707
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,32,1024,1,0,7.414213180541992
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,32,1024,64,0,0.2741760015487671
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,32,1024,64,0,0.27693865696589154
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,32,1024,128,0,0.20935465892155966
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,32,1024,128,0,0.20910932620366415
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,1,1536,1,0,0.4383466641108195
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,1,1536,2,0,0.24644267559051514
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,1,1536,1,0,0.43953601519266766
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,1,1536,2,0,0.26071999470392865
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,1,1536,8,0,0.16391467054684958
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,1,1536,4,0,0.16890132427215576
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,1,1536,8,0,0.1593173344930013
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,1,1536,32,0,0.13716800014177957
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,1,1536,4,0,0.17071467638015747
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,1,1536,16,0,0.16844266653060913
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,1,1536,16,0,0.17514665921529135
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,1,1536,32,0,0.16832532485326132
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,1,1536,64,0,0.16570666432380676
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,1,1536,64,0,0.17117865880330405
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,1,1536,128,0,0.1641493340333303
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,1,1536,128,0,0.16721065839131674
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,2,1536,2,0,0.44412267208099365
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,2,1536,4,0,0.25180800755818683
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,2,1536,2,0,0.4442186752955119
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,2,1536,1,0,0.8216479619344076
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,2,1536,4,0,0.25588266054789227
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,2,1536,8,0,0.16315199931462607
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,2,1536,1,0,0.8243253231048584
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,2,1536,8,0,0.17766932646433511
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,2,1536,16,0,0.7827040354410807
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,2,1536,32,0,0.17345066865285239
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,2,1536,16,0,0.1726026733716329
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,2,1536,32,0,0.17475199699401855
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,2,1536,64,0,0.1649493376413981
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,2,1536,64,0,0.17244267463684082
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,2,1536,128,0,0.17668799559275308
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,2,1536,128,0,0.16785067319869995
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,4,1536,2,0,0.8217120170593262
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,4,1536,1,0,1.5816960334777832
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,4,1536,1,0,1.577445348103841
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,4,1536,2,0,0.8305599689483643
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,4,1536,4,0,0.47217599550882977
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,4,1536,4,0,0.4467039903004964
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,4,1536,8,0,0.2613706588745117
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,4,1536,8,0,0.2643946607907613
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,4,1536,16,0,0.18088533480962118
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,4,1536,16,0,0.1788533329963684
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,4,1536,32,0,0.1670666734377543
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,4,1536,32,0,0.17139732837677002
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,4,1536,64,0,0.1830293337504069
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,4,1536,64,0,0.22458134094874063
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,4,1536,128,0,0.17397334178288779
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,4,1536,128,0,0.17429866393407187
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,8,1536,2,0,1.5924053192138672
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,8,1536,4,0,0.8443413575490316
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,8,1536,4,0,0.8406293392181396
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,8,1536,1,0,3.080922762552897
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,8,1536,1,0,3.082085291544596
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,8,1536,2,0,1.5864906311035156
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,8,1536,8,0,0.4687466621398926
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,8,1536,8,0,0.4665333429972331
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,8,1536,16,0,0.29580267270406085
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,8,1536,16,0,0.2974666754404704
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,8,1536,32,0,0.1878933310508728
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,8,1536,64,0,0.1546933352947235
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,8,1536,128,0,0.1699733336766561
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,8,1536,64,0,0.16978132724761963
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,8,1536,32,0,0.19205333789189658
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,8,1536,128,0,0.16590399543444315
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,16,1536,2,0,3.156325340270996
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,16,1536,4,0,1.6600106557210286
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,16,1536,4,0,1.6395306587219238
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,16,1536,8,0,0.8869120279947916
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,16,1536,2,0,3.123413403828939
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,16,1536,8,0,0.879962682723999
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,16,1536,16,0,0.5494879881540934
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,16,1536,32,0,0.33350932598114014
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,16,1536,1,0,6.133839925130208
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,16,1536,16,0,0.5452640056610107
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,16,1536,1,0,6.126533508300781
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,16,1536,32,0,0.3374240001042684
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,16,1536,64,0,0.22593599557876587
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,16,1536,128,0,0.1779680053393046
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,16,1536,64,0,0.22992533445358276
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,1,2048,1,0,0.6132693290710449
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,1,2048,1,0,0.6190079847971598
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,1,2048,2,0,0.3405333360036214
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,16,1536,128,0,0.1763520042101542
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,1,2048,2,0,0.3397066593170166
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,1,2048,4,0,0.20337067047754923
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,1,2048,8,0,0.15548266967137656
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,1,2048,16,0,0.16613333423932394
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,1,2048,8,0,0.17283199230829874
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,1,2048,4,0,0.20270933707555136
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,1,2048,32,0,0.15846932927767435
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,1,2048,16,0,0.13945600390434265
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,1,2048,64,0,0.2201813260714213
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,1,2048,32,0,0.17004267374674478
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,1,2048,64,0,0.17522132396697998
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,1,2048,128,0,0.1726400057474772
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,1,2048,128,0,0.1747573415438334
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,2,2048,2,0,0.6161919832229614
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,2,2048,1,0,1.1809333165486653
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,2,2048,2,0,0.6218239863713583
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,2,2048,4,0,0.37115732828776044
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,2,2048,4,0,0.4896106719970703
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,2,2048,1,0,1.167365312576294
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,2,2048,8,0,0.20433600743611655
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,2,2048,8,0,0.2029973268508911
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,2,2048,16,0,0.31876800457636517
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,2,2048,32,0,0.16768532991409302
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,2,2048,16,0,0.16879467169443765
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,2,2048,64,0,0.16475733121236166
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,2,2048,32,0,0.17917867501576742
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,2,2048,128,0,0.26236265897750854
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,2,2048,64,0,0.16224533319473267
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,2,2048,128,0,0.176581343015035
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,4,2048,4,0,0.645087997118632
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,4,2048,8,0,0.36530665556589764
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,4,2048,4,0,0.6432533264160156
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,4,2048,2,0,1.1781280040740967
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,4,2048,2,0,1.1907306512196858
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,4,2048,1,0,2.273615996042887
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,4,2048,8,0,0.35397334893544513
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,4,2048,1,0,2.27018133799235
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,4,2048,16,0,0.22266133626302084
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,4,2048,16,0,0.21707199017206827
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,4,2048,32,0,0.17152533928553262
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,4,2048,32,0,0.1767359972000122
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,4,2048,64,0,0.17173333962758383
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,4,2048,128,0,0.16412267088890076
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,4,2048,64,0,0.1816426714261373
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,4,2048,128,0,0.17002665996551514
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,8,2048,8,0,0.6550346612930298
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,8,2048,4,0,1.2067039807637532
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,8,2048,2,0,2.316330591837565
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,8,2048,4,0,1.1962773005167644
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,8,2048,2,0,2.2899252573649087
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,8,2048,1,0,4.508469263712565
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,8,2048,8,0,0.6566773255666097
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,8,2048,16,0,0.3774506648381551
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,8,2048,1,0,4.488997459411621
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,8,2048,16,0,0.3827466567357381
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,8,2048,32,0,0.2539733250935872
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,8,2048,64,0,0.18125865856806436
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,8,2048,32,0,0.2560746669769287
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,8,2048,128,0,0.16475199659665427
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,8,2048,128,0,0.1675893266995748
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,8,2048,64,0,0.18236267566680908
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,16,2048,8,0,1.244383970896403
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,16,2048,4,0,2.343301296234131
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,16,2048,8,0,1.2485333283742268
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,16,2048,4,0,2.341365337371826
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,16,2048,16,0,0.7009759744008383
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,16,2048,2,0,4.520144144694011
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,16,2048,2,0,4.517797470092773
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,16,2048,16,0,0.7048373222351074
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,16,2048,32,0,0.4567199945449829
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,16,2048,32,0,0.45979201793670654
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,16,2048,64,0,0.30582932631174725
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,16,2048,64,0,0.30922667185465497
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,16,2048,128,0,0.2273226579030355
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,16,2048,1,0,8.81057612101237
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,16,2048,1,0,8.814303716023764
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,16,2048,128,0,0.2302293380101522
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,1,3072,2,0,0.8247893651326498
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,1,3072,2,0,0.5549173355102539
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,1,3072,1,0,1.0360746383666992
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,1,3072,4,0,0.31806399424870807
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,1,3072,4,0,0.3157866597175598
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,1,3072,1,0,1.0355199972788494
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,1,3072,8,0,0.1944426695505778
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,1,3072,8,0,0.19261866807937622
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,1,3072,16,0,0.15875200430552164
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,1,3072,16,0,0.15933866302172342
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,1,3072,32,0,0.16596266627311707
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,1,3072,32,0,0.1768266757329305
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,1,3072,64,0,0.16920000314712524
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,1,3072,64,0,0.1759893298149109
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,1,3072,128,0,0.14825600385665894
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,1,3072,128,0,0.16925332943598428
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,2,3072,2,0,1.0390079816182454
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,2,3072,1,0,1.9887839953104656
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,2,3072,4,0,0.5617013374964396
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,2,3072,2,0,1.0456266403198242
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,2,3072,1,0,1.9837226867675781
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,2,3072,4,0,0.5701226790746053
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,2,3072,8,0,0.31885866324106854
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,2,3072,8,0,0.32681065797805786
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,2,3072,16,0,0.1979573369026184
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,2,3072,16,0,0.22951465845108032
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,2,3072,32,0,0.14816000064214072
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,2,3072,32,0,0.15061333775520325
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,2,3072,64,0,0.1718399922053019
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,2,3072,64,0,0.16586132844289145
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,2,3072,128,0,0.16154133280118307
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,2,3072,128,0,0.8735253016153971
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,4,3072,4,0,1.0537493228912354
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,4,3072,2,0,2.019162654876709
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,4,3072,8,0,0.5806826750437418
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,4,3072,2,0,2.0352479616800943
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,4,3072,4,0,1.0519786675771077
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,4,3072,8,0,0.5784586668014526
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,4,3072,16,0,0.33881068229675293
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,4,3072,1,0,3.9314772288004556
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,4,3072,1,0,3.9055840174357095
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,4,3072,16,0,0.9526613553365072
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,4,3072,32,0,0.22622400522232056
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,4,3072,32,0,0.6259626547495524
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,4,3072,64,0,0.17291200160980225
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,4,3072,64,0,0.18077866236368814
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,4,3072,128,0,0.17919466892878214
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,4,3072,128,0,0.17038400967915854
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,8,3072,8,0,1.0962080160776775
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,8,3072,4,0,2.0402347246805825
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,8,3072,4,0,2.044271945953369
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,8,3072,2,0,3.9285332361857095
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,8,3072,2,0,3.9270881017049155
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,8,3072,8,0,1.098357359568278
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,8,3072,16,0,0.6151839892069498
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,8,3072,32,0,0.3982880115509033
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,8,3072,1,0,7.754549026489258
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,8,3072,32,0,0.40064001083374023
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,8,3072,16,0,0.6420693397521973
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,8,3072,1,0,7.728330612182617
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,8,3072,64,0,0.2656266689300537
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,8,3072,64,0,0.2683200041453044
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,8,3072,128,0,0.19612799088160196
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,8,3072,128,0,0.2023626764615377
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,1,4096,1,0,1.5380373001098633
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,1,4096,1,0,1.5479092597961426
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,1,4096,8,0,0.2672053376833598
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,1,4096,2,0,1.6536107063293457
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,1,4096,4,0,0.4530613422393799
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,1,4096,4,0,0.4524213473002116
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,1,4096,2,0,0.8209386666615804
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,1,4096,8,0,0.26659200588862103
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,1,4096,16,0,0.2823573350906372
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,1,4096,16,0,0.1755946675936381
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,1,4096,32,0,0.16155733664830527
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,1,4096,128,0,0.1606933375199636
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,1,4096,32,0,0.15983466307322183
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,1,4096,64,0,0.17003732919692993
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,1,4096,128,0,0.19147199392318726
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,1,4096,64,0,0.16857600212097168
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,2,4096,4,0,0.8194613456726074
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,2,4096,2,0,1.5506772994995117
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,2,4096,4,0,0.8249599933624268
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,2,4096,8,0,0.46584534645080566
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,2,4096,2,0,1.5599625905354817
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,2,4096,16,0,0.275221327940623
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,2,4096,1,0,2.9987786610921225
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,2,4096,8,0,0.4786986509958903
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,2,4096,1,0,3.0093441009521484
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,2,4096,16,0,0.27748266855875653
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,2,4096,64,0,0.1684053341547648
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,2,4096,32,0,0.18472532431284586
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,2,4096,64,0,0.16856000820795694
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,2,4096,32,0,0.18633600076039633
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,2,4096,128,0,1.5152319272359211
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,2,4096,128,0,0.16576533516248068
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,4,4096,4,0,1.5699040095011394
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,4,4096,8,0,0.8553386529286703
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,4,4096,8,0,0.8518986701965332
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,4,4096,4,0,1.5812586148579915
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,4,4096,2,0,3.034735997517904
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,4,4096,2,0,3.0459038416544595
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,4,4096,16,0,0.48190399010976154
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,4,4096,16,0,0.48556800683339435
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,4,4096,1,0,5.942410786946614
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,4,4096,32,0,0.30112000306447345
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,4,4096,1,0,5.914746602376302
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,4,4096,32,0,0.30167466402053833
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,4,4096,64,0,0.21740800142288208
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,4,4096,128,0,0.1657866636912028
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,4,4096,64,0,0.21779733896255493
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,4,4096,128,0,0.16547200083732605
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,8,4096,8,0,1.6225813229878743
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,8,4096,4,0,3.0607306162516275
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,8,4096,8,0,1.6291786829630535
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,8,4096,4,0,3.082768122355143
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,8,4096,2,0,5.980031967163086
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,8,4096,16,0,0.8996373017628988
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,8,4096,16,0,0.8950080076853434
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,8,4096,2,0,5.94261360168457
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,8,4096,64,0,0.3696586688359578
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,8,4096,32,0,0.5363680124282837
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,8,4096,32,0,0.55458664894104
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,8,4096,64,0,0.372538685798645
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,8,4096,128,0,0.2653706669807434
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,8,4096,128,0,0.2675039966901143
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,8,4096,1,0,11.758992513020834
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,8,4096,1,0,11.7217648824056
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,1,6144,1,0,2.821728070576986
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,1,6144,2,0,1.4933759371439617
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,1,6144,4,0,0.7831520239512125
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,1,6144,2,0,1.4841119448343914
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,1,6144,1,0,2.839402516682943
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,1,6144,8,0,0.4507519801457723
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,1,6144,4,0,0.7815519968668619
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,1,6144,16,0,0.27376532554626465
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,1,6144,8,0,0.44741864999135333
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,1,6144,32,0,0.1853920022646586
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,1,6144,16,0,0.2722933292388916
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,1,6144,32,0,0.19011733929316202
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,1,6144,64,0,0.1734559933344523
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,1,6144,64,0,0.18260266383488974
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,1,6144,128,0,0.16898133357365927
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,1,6144,128,0,0.21308799584706625
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,2,6144,2,0,2.849130630493164
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,2,6144,8,0,0.83133864402771
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,2,6144,4,0,1.4782026608784993
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,2,6144,4,0,1.4882346789042156
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,2,6144,8,0,0.8090133666992188
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,2,6144,16,0,0.46194668610890705
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,2,6144,1,0,5.576592127482097
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,2,6144,2,0,2.8357601165771484
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,2,6144,1,0,5.569183985392253
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,2,6144,16,0,0.4637226661046346
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,2,6144,32,0,0.2890719970067342
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,2,6144,32,0,0.2911840081214905
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,2,6144,64,0,0.2144213318824768
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,2,6144,64,0,0.21880000829696655
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,2,6144,128,0,0.18291199207305908
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,2,6144,128,0,0.1518933375676473
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,4,6144,8,0,1.530128002166748
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,4,6144,4,0,2.8765281041463218
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,4,6144,4,0,2.876842816670736
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,4,6144,2,0,5.5937760670979815
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,4,6144,16,0,0.8608960310618082
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,4,6144,2,0,5.586463928222656
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,4,6144,16,0,0.844709316889445
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,4,6144,32,0,0.4997333288192749
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,4,6144,8,0,1.5204159418741863
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,4,6144,32,0,0.5000053246816
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,4,6144,64,0,0.3391733169555664
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,4,6144,64,0,0.34169065952301025
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,4,6144,1,0,10.941429138183594
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,4,6144,128,0,1.1408320267995198
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,4,6144,128,0,0.248906672000885
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,4,6144,1,0,11.078357696533203
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,1,8192,2,0,2.300666650136312
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,1,8192,4,0,1.2204853693644206
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,1,8192,8,0,0.6721493403116862
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,1,8192,2,0,2.3096906344095864
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,1,8192,4,0,1.216117302576701
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,1,8192,8,0,0.7060746351877848
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,1,8192,32,0,0.26418666044871014
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,1,8192,16,0,0.3952639897664388
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,1,8192,1,0,4.50984541575114
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,1,8192,32,0,0.25840532779693604
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,1,8192,1,0,4.466821352640788
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,1,8192,16,0,0.39900799592336017
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,1,8192,64,0,0.174127995967865
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,1,8192,128,0,0.1723840037981669
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,1,8192,64,0,0.18301333983739218
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,1,8192,128,0,0.16933866341908774
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,2,8192,4,0,2.3237387339274087
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,2,8192,8,0,1.247050682703654
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,2,8192,4,0,2.3286186854044595
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,2,8192,8,0,1.2330559889475505
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,2,8192,2,0,4.491658528645833
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,2,8192,16,0,0.6914827028910319
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,2,8192,2,0,4.529253323872884
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,2,8192,16,0,0.7030080159505209
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,2,8192,32,0,0.4190133412679036
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,2,8192,32,0,0.4193919897079468
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,2,8192,64,0,0.2818560004234314
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,2,8192,128,0,0.20749332507451376
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,2,8192,64,0,0.28299200534820557
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,2,8192,1,0,8.94157346089681
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,2,8192,1,0,8.999034881591797
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,2,8192,128,0,0.20939199129740396
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,4,8192,8,0,2.3683199882507324
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,4,8192,4,0,4.550965309143066
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,4,8192,4,0,4.5467573801676435
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,4,8192,8,0,2.3720800081888833
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,4,8192,16,0,1.2873813311258953
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,4,8192,2,0,8.923189163208008
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,4,8192,2,0,9.031061172485352
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,4,8192,16,0,1.2935679753621419
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,4,8192,32,0,0.7498186429341634
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,4,8192,32,0,0.7463146845499674
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,4,8192,64,0,0.46865065892537433
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,4,8192,128,0,0.3488159974416097
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,4,8192,64,0,0.47483734289805096
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,4,8192,128,0,0.3492639859517415
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,4,8192,1,0,17.834341684977215
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,1,10240,4,0,1.734816074371338
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,1,10240,2,0,3.302149454752604
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,1,10240,4,0,1.7379147211710613
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,4,8192,1,0,17.9693120320638
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,1,10240,2,0,3.3331521352132163
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,1,10240,8,0,0.9492800235748291
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,1,10240,1,0,6.490863800048828
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,1,10240,8,0,0.9414827028910319
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,1,10240,1,0,6.494880040486653
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,1,10240,16,0,0.5620746612548828
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,1,10240,16,0,0.5432853301366171
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,1,10240,32,0,0.34460266431172687
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,1,10240,32,0,0.3503359953562419
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,1,10240,64,0,0.23817066351572672
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,1,10240,64,0,0.23588800430297852
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,1,10240,128,0,0.18091734250386557
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,1,10240,128,0,0.17626667022705078
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,2,10240,8,0,1.7767626444498699
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,2,10240,4,0,3.3613974253336587
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,2,10240,4,0,3.3377866744995117
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,2,10240,2,0,6.589770634969075
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,2,10240,2,0,6.609877268473308
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,2,10240,8,0,1.7786720593770344
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,2,10240,16,0,0.975600004196167
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,2,10240,16,0,0.967957337697347
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,2,10240,32,0,0.5760586659113566
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,2,10240,32,0,0.5851519902547201
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,2,10240,64,0,0.36849598089853924
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,2,10240,128,0,0.2786239981651306
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,2,10240,128,0,0.27937066555023193
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,2,10240,64,0,0.37215999762217206
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,2,10240,1,0,13.276847839355469
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,2,10240,1,0,13.1113650004069
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,1,12288,8,0,1.283509333928426
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,1,12288,4,0,2.351967970530192
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,1,12288,4,0,2.4080212910970054
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,1,12288,2,0,4.517951965332031
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,1,12288,2,0,4.537237485249837
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,1,12288,8,0,1.2802986303965251
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,1,12288,16,0,0.7510293324788412
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,1,12288,1,0,9.30787722269694
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,1,12288,32,0,0.4537706772486369
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,1,12288,16,0,0.7086826960245768
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,1,12288,64,0,0.2993226647377014
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,1,12288,32,0,0.4343573252360026
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,1,12288,1,0,9.113290786743164
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,1,12288,64,0,0.29339732726414997
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,1,12288,128,0,0.21467200915018717
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,1,12288,128,0,0.20512000719706217
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,2,12288,8,0,2.391157309214274
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,2,12288,8,0,2.3784640630086265
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,2,12288,4,0,4.603407859802246
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,2,12288,4,0,4.538453420003255
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,2,12288,16,0,1.2903520266215007
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,2,12288,2,0,9.06167984008789
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,2,12288,2,0,8.978378931681315
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,2,12288,16,0,1.291541337966919
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,2,12288,32,0,1.216655969619751
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,2,12288,32,0,0.7588799794514974
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,2,12288,64,0,0.4708053270975749
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,2,12288,64,0,0.4808906714121501
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,2,12288,128,0,0.3489973147710164
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,2,12288,128,0,0.3410186767578125
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,2,12288,1,0,18.28663508097331
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,2,12288,1,0,18.503957112630207
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,1,16384,8,0,2.0740373929341636
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,1,16384,4,0,3.856538772583008
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,1,16384,8,0,2.068570613861084
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,1,16384,4,0,3.821573257446289
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,1,16384,2,0,7.727877298990886
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,1,16384,16,0,1.1253493626912434
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,1,16384,2,0,7.7651627858479815
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,1,16384,16,0,1.1245120366414387
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,1,16384,32,0,0.6533013184865316
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,1,16384,32,0,0.6518400112787882
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,1,16384,64,0,0.44565868377685547
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,1,16384,64,0,0.43306132157643634
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,1,16384,128,0,0.3095146616299947
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,1,16384,128,0,0.2923840085665385
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,1,16384,1,0,15.861461639404297
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,1,16384,1,0,15.438549041748047
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,16,2,16384,8,0,3.9845921198527017
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,16,2,16384,8,0,4.006127993265788
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,32,2,16384,4,0,7.7304642995198565
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,32,2,16384,4,0,7.713162740071614
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,8,2,16384,16,0,2.079967975616455
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,4,2,16384,32,0,1.245365301767985
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,8,2,16384,16,0,2.1002987225850425
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,2,2,16384,64,0,0.7059146563212076
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,4,2,16384,32,0,1.2417279879252117
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,2,2,16384,64,0,0.7261386712392172
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,1,2,16384,128,0,0.4853920141855876
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,1,2,16384,128,0,0.49774932861328125
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,64,2,16384,2,0,15.928517659505209
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,64,2,16384,2,0,15.712149302164713
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flash_attn_mla,float16,float16,128,2,16384,1,0,31.696027119954426
VLLM,0.12.0,NVIDIA H200,context_mla,vllm_flashmla,float16,fp8,128,2,16384,1,0,31.676864624023438
