framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,16,1,0,0.18147200345993042
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,16,2,0,0.16130133469899496
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,16,8,0,0.10458133618036906
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,16,4,0,0.16172266999880472
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,16,4,0,0.1151626706123352
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,16,8,0,0.17167999347050986
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,16,2,0,0.17673067251841226
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,16,1,0,0.16189866264661154
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,16,16,0,0.23200533787409464
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,16,32,0,0.11469866832097371
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,16,16,0,0.10698133707046509
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,16,64,0,0.16876266400019327
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,16,64,0,0.2330933411916097
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,16,32,0,0.22458134094874063
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,16,128,0,0.13854933778444925
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,16,128,0,0.20815465847651163
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,16,1,0,0.1660319964090983
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,16,2,0,0.1660426656405131
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,16,2,0,0.22968000173568726
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,16,1,0,0.1939093271891276
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,16,4,0,0.1390720009803772
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,16,4,0,0.2339786688486735
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,16,16,0,0.16667733589808145
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,16,8,0,0.1651893357435862
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,16,32,0,0.1288266678651174
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,16,16,0,0.1756053368250529
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,16,8,0,0.16768000523249307
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,16,32,0,0.22952000300089517
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,16,64,0,0.16478400429089865
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,16,128,0,0.15996266404787698
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,16,64,0,0.2345013419787089
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,16,128,0,0.20961066087086996
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,16,1,0,0.2884426712989807
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,16,2,0,0.1604266663392385
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,16,1,0,0.24438399076461792
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,16,4,0,0.16460266709327698
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,16,2,0,0.2799786726633708
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,16,4,0,0.154831995566686
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,16,8,0,0.11674132943153381
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,16,8,0,0.1899413267771403
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,16,16,0,0.20721600453058878
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,16,16,0,0.13748799761136374
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,16,32,0,0.16986666123072305
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,16,32,0,0.22909333308537802
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,16,64,0,0.16098666191101074
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,16,64,0,0.22429333130518594
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,16,128,0,0.1688693364461263
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,16,128,0,0.2114880084991455
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,16,1,0,0.1288746694723765
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,16,1,0,0.18962132930755615
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,16,2,0,0.1710933248202006
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,16,4,0,0.1641333301862081
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,16,4,0,0.1771999994913737
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,16,2,0,0.23388266563415527
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,16,8,0,0.11239999532699585
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,16,8,0,0.24328533808390299
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,16,16,0,0.17260799805323282
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,16,16,0,0.17947200934092203
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,16,32,0,0.12463466326395671
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,16,128,0,0.20454933245976767
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,16,32,0,0.2278346618016561
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,16,64,0,0.16267200311024985
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,16,64,0,0.2266826629638672
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,16,128,0,0.16457600394884744
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,16,2,0,0.18171733617782593
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,16,1,0,0.24978133042653403
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,16,1,0,0.17249067624409994
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,16,2,0,0.23811733722686768
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,16,4,0,0.22404267390569052
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,16,4,0,0.26238399744033813
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,16,8,0,0.17033600807189941
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,16,8,0,0.23407999674479166
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,16,16,0,0.13828800121943155
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,16,16,0,0.2399946649869283
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,16,32,0,0.16681599617004395
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,16,64,0,0.135535995165507
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,16,32,0,0.2063360015551249
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,16,64,0,0.18507200479507446
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,16,128,0,0.1555519998073578
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,16,128,0,0.2037280003229777
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,32,16,1,0,0.45927464962005615
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,32,16,2,0,0.17629333337148032
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,32,16,1,0,0.20318400859832764
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,32,16,4,0,0.17108267545700073
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,32,16,2,0,0.27396267652511597
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,32,16,4,0,0.227674663066864
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,32,16,8,0,0.13275200128555298
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,32,16,8,0,0.23477333784103394
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,32,16,16,0,0.1578933298587799
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,32,16,16,0,0.2247520089149475
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,32,16,32,0,0.1681600014368693
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,32,16,64,0,0.32654933134714764
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,32,16,32,0,0.2207146684328715
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,32,16,64,0,0.1665226618448893
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,32,16,128,0,0.1609333356221517
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,32,16,128,0,0.16517333189646402
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,64,16,1,0,0.3336319923400879
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,64,16,2,0,0.1872373421986898
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,64,16,4,0,0.17538666725158691
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,64,16,1,0,0.860853354136149
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,64,16,4,0,0.31965333223342896
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,64,16,8,0,0.16201600432395935
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,64,16,8,0,0.24473067124684653
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,64,16,2,0,0.5089759826660156
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,64,16,16,0,0.17541333039601645
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,64,16,16,0,0.20277865727742514
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,64,16,32,0,0.10895466804504395
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,64,16,32,0,0.22772266467412314
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,64,16,64,0,0.17241599162419638
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,64,16,64,0,0.23565866549809775
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,64,16,128,0,0.17409600814183554
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,64,16,128,0,0.1906613310178121
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,128,16,1,0,0.6147093375523885
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,128,16,2,0,0.32822932799657184
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,128,16,4,0,0.6223839918772379
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,128,16,1,0,1.6697386105855305
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,128,16,2,0,0.9780853589375814
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,128,16,4,0,0.19187732537587485
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,128,16,8,0,0.16867733001708984
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,128,16,8,0,0.43085332711537677
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,128,16,16,0,0.16940265893936157
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,128,16,16,0,0.34135464827219647
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,128,16,32,0,0.16529066363970438
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,128,16,64,0,0.16622400283813477
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,128,16,32,0,0.3204853336016337
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,128,16,64,0,0.2701066732406616
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,128,16,128,0,0.1593546668688456
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,128,16,128,0,0.23469332853953043
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,256,16,4,0,0.3358773390452067
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,256,16,2,0,1.8959147135416667
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,256,16,2,0,0.6182399988174438
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,256,16,4,0,1.1933440367380779
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,256,16,8,0,0.1987946629524231
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,256,16,1,0,1.187930663426717
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,256,16,1,0,3.2612053553263345
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,256,16,8,0,0.8277599811553955
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,256,16,16,0,0.6689600149790446
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,256,16,32,0,0.16196800271670023
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,256,16,32,0,0.5714186827341715
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,256,16,64,0,0.15888532996177673
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,256,16,16,0,0.16512533028920492
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,32,1,0,0.11329600214958191
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,256,16,64,0,0.4864000082015991
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,32,1,0,0.18570667505264282
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,32,2,0,0.16514666875203451
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,256,16,128,0,0.16381866733233133
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,32,2,0,0.22363734245300293
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,256,16,128,0,0.4306346575419108
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,32,4,0,0.21996267636617026
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,32,4,0,0.16290133198102316
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,32,8,0,0.15714133779207864
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,32,16,0,0.1660373310248057
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,32,32,0,0.1609119971593221
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,32,8,0,0.22648000717163086
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,32,16,0,0.2309173345565796
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,32,32,0,0.23285333315531412
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,32,64,0,0.15921599666277567
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,32,64,0,0.19486399491628012
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,32,128,0,0.167738676071167
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,32,128,0,0.20238399505615234
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,32,1,0,0.12296533584594727
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,32,2,0,0.14497599999109903
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,32,1,0,0.22648000717163086
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,32,4,0,0.16023466984430948
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,32,2,0,0.21959465742111206
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,32,4,0,0.17244799931844076
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,32,8,0,0.16868799924850464
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,32,8,0,0.19419199228286743
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,32,16,0,0.15651733676592508
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,32,16,0,0.18616533279418945
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,32,32,0,0.14776000380516052
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,32,32,0,0.22244266668955484
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,32,64,0,0.16174399852752686
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,32,64,0,0.17069866259892783
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,32,128,0,0.1672746737798055
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,32,128,0,0.22639999787012735
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,32,1,0,0.223855992158254
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,32,2,0,0.21861332654953003
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,32,1,0,0.16780799627304077
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,32,4,0,0.14346667130788168
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,32,2,0,0.1648426651954651
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,32,4,0,0.22530666987101236
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,32,8,0,0.23521065711975098
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,32,8,0,0.16580800215403238
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,32,16,0,0.15587733189264932
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,32,64,0,0.12480533123016357
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,32,16,0,0.2222506602605184
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,32,32,0,0.15834133823712668
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,32,32,0,0.2424959937731425
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,32,64,0,0.21917865673700967
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,32,128,0,0.1639786660671234
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,32,128,0,0.2079040010770162
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,32,1,0,0.17117865880330405
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,32,2,0,0.16262400150299072
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,32,1,0,0.24945066372553507
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,32,2,0,0.22235200802485147
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,32,4,0,0.16648000478744507
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,32,4,0,0.19581333796183267
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,32,8,0,0.22528000672658285
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,32,8,0,0.1354986627896627
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,32,32,0,0.16481066743532816
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,32,16,0,0.16176000237464905
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,32,16,0,0.1948053240776062
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,32,32,0,0.2299786607424418
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,32,64,0,0.1649386684099833
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,32,128,0,0.16823999087015787
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,32,64,0,0.2269973357518514
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,32,128,0,0.15994667013486227
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,32,1,0,0.19337066014607748
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,32,2,0,0.1397706667582194
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,32,1,0,0.4599573214848836
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,32,4,0,0.16594133774439493
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,32,2,0,1.0667253335316975
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,32,4,0,0.2214826742808024
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,32,8,0,0.1646613379319509
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,32,8,0,0.23434134324391684
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,32,16,0,0.1699999968210856
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,32,16,0,0.22793066501617432
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,32,32,0,0.16264533003171286
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,32,64,0,0.15982932845751444
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,32,64,0,0.2459999918937683
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,32,32,0,0.8793973128000895
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,32,128,0,0.13685333728790283
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,32,128,0,0.1637279987335205
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,32,32,1,0,0.32825066645940143
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,32,32,1,0,0.8588106632232666
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,32,32,2,0,0.4936053355534871
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,32,32,2,0,0.1874986688296
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,32,32,4,0,0.16597333550453186
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,32,32,4,0,0.3051519989967346
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,32,32,8,0,0.1612266699473063
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,32,32,8,0,0.21076265970865884
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,32,32,16,0,0.1641546686490377
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,32,32,16,0,0.23667200406392416
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,32,32,32,0,0.16266666849454245
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,32,32,32,0,0.19337066014607748
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,32,32,64,0,0.16697599490483603
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,32,32,128,0,0.15916799505551657
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,32,32,64,0,0.2336853345235189
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,32,32,128,0,0.21175465981165567
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,64,32,2,0,0.32844799757003784
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,64,32,1,0,0.6112853288650513
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,64,32,4,0,0.18870933850606283
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,64,32,1,0,1.6511093775431316
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,64,32,2,0,0.9463786284128824
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,64,32,4,0,0.5947519938151041
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,64,32,8,0,0.1418186624844869
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,64,32,8,0,0.3999733527501424
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,64,32,16,0,0.17319466670354208
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,64,32,16,0,0.31934932867685956
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,64,32,32,0,0.1655893325805664
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,64,32,64,0,0.3258826732635498
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,64,32,32,0,0.27958399057388306
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,64,32,64,0,0.25994133949279785
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,64,32,128,0,0.152319997549057
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,64,32,128,0,0.2225066622098287
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,128,32,1,0,1.1672213077545166
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,128,32,4,0,0.33420801162719727
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,128,32,2,0,0.6164266665776571
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,128,32,2,0,1.8338932991027832
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,128,32,4,0,1.142095963160197
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,128,32,8,0,0.7846293449401855
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,128,32,1,0,3.2718025843302407
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,128,32,8,0,0.19515732924143472
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,128,32,16,0,0.13792533675829569
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,128,32,16,0,0.5924853483835856
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,128,32,32,0,0.5107413530349731
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,128,32,32,0,0.1651893357435862
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,128,32,64,0,0.1644000013669332
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,128,32,64,0,0.4733653465906779
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,128,32,128,0,0.16271467010180155
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,128,32,128,0,0.40268266201019287
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,256,32,1,0,2.2604853312174478
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,256,32,2,0,1.1697866916656494
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,256,32,4,0,0.6189279953638712
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,256,32,8,0,0.34514133135477704
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,256,32,4,0,2.236405372619629
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,256,32,2,0,3.5984373092651367
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,256,32,8,0,1.5240747133890789
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,256,32,1,0,6.355871836344401
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,256,32,16,0,0.21014932791392008
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,256,32,32,0,0.13692800203959146
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,256,32,16,0,1.1833226680755615
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,256,32,32,0,0.9968640009562174
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,256,32,64,0,0.15972800056139627
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,64,1,0,0.1628213326136271
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,256,32,128,0,0.16641066471735635
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,64,1,0,0.2269066572189331
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,256,32,64,0,0.9057013193766276
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,64,2,0,0.1563093364238739
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,64,2,0,0.17522132396697998
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,256,32,128,0,0.7725653648376465
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,64,4,0,0.2188213268915812
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,64,4,0,0.12296533584594727
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,64,8,0,0.16706132888793945
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,64,8,0,0.23016534248987833
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,64,16,0,0.1623093287150065
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,64,16,0,0.22243199745814005
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,64,32,0,0.2341653307278951
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,64,32,0,0.1602773368358612
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,64,64,0,0.16051733493804932
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,64,64,0,0.2188053329785665
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,64,128,0,0.16831467549006143
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,64,128,0,0.2013546625773112
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,64,1,0,0.11851200461387634
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,64,1,0,0.167087992032369
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,64,2,0,0.12716799974441528
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,64,2,0,0.17454934120178223
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,64,4,0,0.1569866637388865
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,64,4,0,0.18879467248916626
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,64,16,0,0.18127999703089395
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,64,8,0,0.1592586636543274
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,64,8,0,0.2321066657702128
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,64,16,0,0.15995200475056967
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,64,32,0,0.16059733430544534
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,64,32,0,0.22545599937438965
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,64,64,0,0.1658560037612915
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,64,64,0,0.2226346731185913
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,64,128,0,0.16396266222000122
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,64,128,0,0.20691200097401938
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,64,1,0,0.11582400401433308
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,64,2,0,0.23372799158096313
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,64,1,0,0.25438932577768963
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,64,2,0,0.16425599654515585
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,64,4,0,0.13099199533462524
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,64,4,0,0.22265066703160605
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,64,8,0,0.2205866575241089
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,64,8,0,0.15717333555221558
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,64,16,0,0.16340800126393637
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,64,16,0,0.22548800706863403
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,64,32,0,0.16408532857894897
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,64,32,0,0.23706134160359701
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,64,64,0,0.12717866897583008
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,64,128,0,0.21875733137130737
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,64,64,0,0.22591465711593628
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,64,128,0,0.1581653356552124
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,64,1,0,0.1879253387451172
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,64,1,0,0.46084801355997723
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,64,2,0,0.1358560025691986
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,64,4,0,0.1665173371632894
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,64,2,0,0.25516267617543537
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,64,4,0,0.19121599197387695
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,64,8,0,0.15945067008336386
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,64,8,0,0.2268106738726298
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,64,16,0,0.1622773309548696
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,64,32,0,0.23341333866119385
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,64,16,0,0.18503999710083008
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,64,32,0,0.1492853363355001
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,64,64,0,0.16182399789492288
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,64,128,0,0.15822933117548624
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,64,64,0,0.22404267390569052
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,64,128,0,0.2148533264795939
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,64,1,0,0.33313600222269696
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,64,2,0,0.18452799320220947
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,64,1,0,0.8803573449452718
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,64,8,0,0.1486186683177948
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,64,2,0,0.4922293424606323
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,64,4,0,0.16731733083724976
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,64,4,0,0.2921280066172282
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,64,32,0,0.12748799721399942
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,64,8,0,0.2401706576347351
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,64,16,0,0.15995200475056967
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,64,16,0,0.23998934030532837
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,64,32,0,0.22501333554585776
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,64,64,0,0.1667626698811849
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,64,128,0,0.20201599597930908
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,64,64,0,0.18641066551208496
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,64,128,0,0.16015467047691345
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,32,64,1,0,0.6102399826049805
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,32,64,2,0,0.32868266105651855
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,32,64,2,0,0.9379093647003174
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,32,64,4,0,0.18939733505249023
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,32,64,1,0,1.6608586311340332
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,32,64,4,0,0.5715200106302897
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,32,64,8,0,0.3774293263753255
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,32,64,8,0,0.1674720048904419
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,32,64,16,0,0.2860640088717143
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,32,64,16,0,0.16777066389719644
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,32,64,32,0,0.16025066375732422
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,32,64,32,0,0.3343466520309448
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,32,64,64,0,0.1620373328526815
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,32,64,64,0,0.21842666467030844
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,32,64,128,0,0.15451199809710184
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,32,64,128,0,0.22282665967941284
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,64,64,2,0,0.6104906797409058
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,64,64,1,0,1.163162628809611
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,64,64,4,0,0.32847466071446735
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,64,64,8,0,0.19368533293406168
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,64,64,4,0,1.0943840344746907
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,64,64,8,0,0.7298346360524496
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,64,64,2,0,1.8278080622355144
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,64,64,1,0,3.2412373224894204
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,64,64,16,0,0.13859200477600098
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,64,64,16,0,0.5409173170725504
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,64,64,32,0,0.16081066926320395
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,64,64,64,0,0.1321386694908142
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,64,64,32,0,0.44234665234883624
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,64,64,64,0,0.4139466683069865
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,64,64,128,0,0.16965866088867188
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,64,64,128,0,0.36951998869578045
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,128,64,2,0,1.170090675354004
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,128,64,1,0,2.2632853190104165
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,128,64,4,0,0.6227999925613403
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,128,64,8,0,0.34308799107869464
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,128,64,4,0,2.1288906733194985
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,128,64,2,0,3.5797974268595376
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,128,64,16,0,0.20043200254440308
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,128,64,8,0,1.417418638865153
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,128,64,1,0,6.384927749633789
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,128,64,32,0,0.1302720010280609
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,128,64,16,0,1.0553066730499268
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,128,64,32,0,0.8687146504720052
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,128,64,64,0,0.1673439939816793
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,128,64,128,0,0.16114133596420288
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,128,64,128,0,0.7115519841512045
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,128,64,64,0,0.7557013034820557
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,256,64,4,0,1.1874453226725261
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,256,64,2,0,2.2864960034688315
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,256,64,1,0,4.456021308898926
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,256,64,8,0,0.6455519994099935
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,256,64,2,0,7.083893458048503
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,256,64,4,0,4.198207855224609
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,256,64,16,0,0.3702346483866374
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,256,64,32,0,0.23123733202616373
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,256,64,8,0,2.766079902648926
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,256,64,16,0,2.056725343068441
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,256,64,1,0,12.67691167195638
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,128,1,0,0.17473065853118896
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,128,1,0,0.16539200146993002
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,256,64,64,0,0.17300266027450562
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,256,64,32,0,1.7074987093607585
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,256,64,64,0,1.5273973147074382
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,128,2,0,0.12839466333389282
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,256,64,128,0,0.16289599736531576
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,256,64,128,0,1.4072106679280598
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,128,4,0,0.1597599983215332
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,128,2,0,0.22456000248591104
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,128,4,0,0.22281599044799805
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,128,8,0,0.12959999839464822
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,128,8,0,0.24145066738128662
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,128,32,0,0.22059200207392374
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,128,16,0,0.16568000117937723
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,128,16,0,0.2229599952697754
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,128,64,0,0.18650666872660318
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,128,32,0,0.1702079971631368
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,128,64,0,0.15754133462905884
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,128,128,0,0.16141866644223532
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,128,128,0,0.8689066569010416
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,128,1,0,0.1675306757291158
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,128,2,0,0.16687999169031778
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,128,1,0,0.26014933983484906
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,128,2,0,0.2121280034383138
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,128,4,0,0.24692267179489136
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,128,4,0,0.21985600392023721
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,128,8,0,0.16087466478347778
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,128,8,0,0.2251946727434794
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,128,16,0,0.159360001484553
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,128,16,0,0.22697067260742188
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,128,32,0,0.15749333302179971
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,128,64,0,0.1336426635583242
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,128,32,0,0.3271733323733012
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,128,64,0,0.21361066897710165
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,128,128,0,0.1726026733716329
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,128,128,0,0.20755734046300253
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,128,2,0,0.1653279960155487
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,128,1,0,0.2323466738065084
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,128,1,0,0.47870934009552
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,128,2,0,0.2536799907684326
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,128,4,0,0.2210879921913147
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,128,4,0,0.16581867138544717
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,128,8,0,0.13075199723243713
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,128,8,0,0.22822932402292886
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,128,32,0,0.22990934054056802
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,128,16,0,0.12968533237775168
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,128,16,0,0.18610666195551553
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,128,64,0,0.23172267278035483
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,128,32,0,0.16569599509239197
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,128,64,0,0.15850667158762613
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,128,128,0,0.12793599565823874
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,128,128,0,0.20002667109171549
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,128,1,0,0.3410293261210124
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,128,2,0,0.1914400060971578
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,128,4,0,0.13662933309872946
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,128,2,0,0.4833759864171346
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,128,1,0,0.8934079806009928
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,128,4,0,0.6109280188878378
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,128,8,0,0.15899200240770975
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,128,8,0,0.2290346622467041
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,128,16,0,0.16385599970817566
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,128,64,0,0.16380799810091654
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,128,16,0,0.2159093419710795
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,128,32,0,0.15585600336392721
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,128,32,0,0.236735999584198
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,128,128,0,0.15985600153605142
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,128,64,0,0.2238666613896688
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,128,128,0,0.2080693244934082
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,128,1,0,0.6320106585820516
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,128,2,0,0.3409866491953532
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,128,4,0,0.19158933560053507
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,128,2,0,0.8931573232014974
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,128,1,0,1.71998929977417
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,128,4,0,0.5177866617838541
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,128,8,0,0.15430933237075806
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,128,8,0,0.32894400755564374
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,128,16,0,0.16901866594950357
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,128,16,0,0.23476799329121908
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,128,32,0,0.13281066219011942
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,128,32,0,0.23620800177256265
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,128,64,0,0.15890666842460632
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,128,64,0,0.18672533830006918
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,128,128,0,0.1630346675713857
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,128,128,0,0.2622239987055461
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,32,128,1,0,1.206437349319458
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,32,128,2,0,0.6282399892807007
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,32,128,4,0,0.33816532293955487
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,32,128,2,0,1.7089600563049316
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,32,128,8,0,0.1982240080833435
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,32,128,4,0,0.9942826430002848
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,32,128,1,0,3.3591893513997397
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,32,128,8,0,0.6311039924621582
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,32,128,16,0,0.16806399822235107
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,32,128,32,0,0.16218666235605875
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,32,128,16,0,0.43727465470631915
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,32,128,64,0,0.16064533591270447
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,32,128,32,0,0.9739573001861572
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,32,128,64,0,0.3123786648114522
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,32,128,128,0,0.17393600940704346
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,32,128,128,0,0.26703999439875287
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,64,128,2,0,1.2070133686065674
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,64,128,1,0,2.3351573944091797
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,64,128,8,0,0.34724267323811847
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,64,128,4,0,0.6329066753387451
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,64,128,4,0,1.9298186302185059
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,64,128,2,0,3.357599894205729
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,64,128,16,0,0.2098133365313212
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,64,128,8,0,1.2136320273081462
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,64,128,16,0,0.849280039469401
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,64,128,32,0,0.16484799981117249
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,64,128,1,0,6.625365575154622
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,64,128,32,0,0.6655893325805664
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,64,128,64,0,0.16862932840983072
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,64,128,64,0,0.5698026816050211
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,64,128,128,0,0.1739520033200582
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,64,128,128,0,0.5125226577123007
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,128,128,4,0,1.218773365020752
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,128,128,8,0,0.6554826498031616
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,128,128,2,0,2.370357354482015
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,128,128,1,0,4.599823951721191
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,128,128,8,0,2.3618133862813315
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,128,128,4,0,3.7918294270833335
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,128,128,16,0,0.37616535027821857
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,128,128,2,0,6.624352137247722
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,128,128,32,0,0.23155200481414795
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,128,128,16,0,1.6487147013346355
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,128,128,32,0,1.2890133062998455
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,128,128,64,0,0.15901866555213928
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,128,128,128,0,0.16859199603398642
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,128,128,64,0,1.1084213256835938
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,128,128,128,0,0.9958720207214355
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,128,128,1,0,13.119115193684896
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,256,128,4,0,2.374234676361084
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,256,128,8,0,1.2564373016357422
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,256,128,2,0,4.6433760325113935
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,256,128,1,0,9.098533630371094
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,256,128,4,0,7.513738632202148
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,256,128,8,0,4.663637479146321
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,256,128,32,0,0.4295733372370402
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,256,128,16,0,0.7053013642628988
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,256,128,16,0,3.249749183654785
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,256,128,64,0,0.2898400028546651
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,256,128,2,0,13.130746205647787
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,256,128,32,0,2.5330452919006348
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,256,1,0,0.16995733976364136
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,256,128,128,0,0.20427199204762778
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,256,128,64,0,2.200991948445638
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,256,2,0,0.16553599635759988
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,256,1,0,0.17568000157674155
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,256,2,0,0.16617066661516824
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,256,4,0,0.16100800037384033
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,256,128,128,0,1.979098637898763
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,256,4,0,0.15710933009783426
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,256,8,0,0.16447466611862183
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,256,8,0,0.1641386648019155
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,256,16,0,0.16385599970817566
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,256,16,0,0.16127467155456543
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,256,32,0,0.12682132919629416
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,256,32,0,0.17005334297815958
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,256,64,0,0.15730667114257812
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,256,64,0,0.16235733032226562
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,256,128,1,0,24.48357899983724
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,256,128,0,0.16261866688728333
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,256,128,0,0.15970666209856668
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,256,2,0,0.17046932379404703
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,256,1,0,0.2071626583735148
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,256,2,0,0.1304213305314382
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,256,4,0,0.1290826698144277
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,256,8,0,0.16107733050982156
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,256,1,0,0.17612799008687338
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,256,4,0,0.1588640014330546
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,256,8,0,0.1665066679318746
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,256,16,0,0.1660373310248057
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,256,64,0,0.12692800164222717
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,256,16,0,0.14049599568049112
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,256,32,0,0.15988799929618835
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,256,32,0,0.1418880025545756
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,256,64,0,0.1606933375199636
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,256,128,0,0.1593119998772939
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,256,128,0,0.2657546599706014
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,256,1,0,0.36214931805928546
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,256,1,0,0.30084266265233356
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,256,2,0,0.20374399423599243
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,256,2,0,0.1699999968210856
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,256,4,0,0.17014400164286295
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,256,4,0,0.16365866859753928
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,256,8,0,0.17010132471720377
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,256,8,0,0.14641066392262778
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,256,16,0,0.1322773297627767
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,256,16,0,0.14056000113487244
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,256,32,0,0.132341335217158
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,256,32,0,0.172106663386027
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,256,64,0,0.16809600591659546
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,256,64,0,0.17038400967915854
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,256,128,0,0.15341867009798685
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,256,128,0,0.15254400173823038
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,256,1,0,0.6723360220591227
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,256,2,0,0.36190398534138996
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,256,1,0,0.4850613276163737
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,256,2,0,0.27082133293151855
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,256,4,0,0.20611733198165894
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,256,4,0,0.17324266831080118
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,256,8,0,0.16416000326474509
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,256,8,0,0.1551093359788259
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,256,16,0,0.1607146660486857
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,256,16,0,0.1390773355960846
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,256,32,0,0.21987199783325195
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,256,32,0,0.16888533035914102
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,256,64,0,0.14429866274197897
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,256,64,0,0.1328480045000712
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,256,128,0,0.16499200463294983
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,256,128,0,0.16995733976364136
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,256,1,0,1.268229325612386
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,256,2,0,0.6732426484425863
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,256,2,0,0.4859040180842082
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,256,4,0,0.3590720097223918
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,256,1,0,0.9077653090159098
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,256,4,0,0.26868265867233276
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,256,8,0,0.20511466264724731
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,256,8,0,0.17476266622543335
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,256,16,0,0.17149867614110312
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,256,16,0,0.16152000427246094
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,256,32,0,0.1609280010064443
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,256,32,0,0.16473600268363953
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,256,64,0,0.16553599635759988
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,256,64,0,0.1498240033785502
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,256,128,0,0.16597333550453186
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,256,128,0,0.1556053360303243
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,32,256,2,0,1.280197302500407
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,32,256,4,0,0.6794133186340332
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,32,256,1,0,1.7756373087565105
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,32,256,2,0,0.9185919761657715
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,32,256,1,0,2.4801012674967446
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,32,256,4,0,0.4969013532002767
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,32,256,8,0,0.3649653196334839
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,32,256,8,0,0.28044267495473224
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,32,256,16,0,0.2166773279507955
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,32,256,16,0,0.17226133743921915
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,32,256,32,0,0.2797866662343343
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,32,256,32,0,0.1685439944267273
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,32,256,64,0,0.16193067034085593
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,32,256,128,0,0.1991680065790812
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,32,256,64,0,0.17038933436075845
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,32,256,128,0,0.8489440282185873
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,64,256,4,0,1.2992746829986572
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,64,256,2,0,1.7776212692260742
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,64,256,4,0,0.9418133099873861
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,64,256,8,0,0.6913332939147949
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,64,256,2,0,2.504490693410238
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,64,256,1,0,3.474442799886068
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,64,256,8,0,0.5147626797358195
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,64,256,1,0,4.903562545776367
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,64,256,16,0,0.3912586768468221
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,64,256,16,0,0.3048959970474243
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,64,256,64,0,0.16987733046213785
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,64,256,64,0,0.13904000322024027
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,64,256,32,0,0.24450665712356567
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,64,256,32,0,0.19637866814931235
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,64,256,128,0,0.15938666462898254
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,64,256,128,0,0.1588320036729177
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,128,256,4,0,2.5611732800801597
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,128,256,8,0,0.9841866493225098
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,128,256,8,0,1.3391040166219075
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,128,256,2,0,3.5116907755533853
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,128,256,4,0,1.8244959513346355
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,128,256,2,0,4.961653391520183
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,128,256,1,0,6.895008087158203
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,128,256,16,0,0.7430133024851481
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,128,256,16,0,0.5681546529134115
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,128,256,32,0,0.4487626552581787
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,128,256,1,0,9.691375732421875
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,128,256,32,0,0.3581226666768392
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,128,256,64,0,0.29954665899276733
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,128,256,64,0,0.24912534157435098
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,128,256,128,0,0.21014400323232016
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,512,1,0,0.23342400789260864
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,512,1,0,0.15424533685048422
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,128,256,128,0,0.19697066148122153
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,512,2,0,0.1564853290716807
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,512,2,0,0.16779200236002603
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,512,4,0,0.12133333086967468
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,512,4,0,0.1621226668357849
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,512,8,0,0.12965333461761475
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,512,8,0,0.1586133340994517
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,512,16,0,0.16522666811943054
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,512,16,0,0.15708266695340475
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,512,32,0,0.163482666015625
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,512,32,0,0.13376532991727194
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,512,64,0,0.1297920048236847
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,512,64,0,0.16029333074887595
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,512,128,0,0.15921066204706827
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,512,128,0,0.15988799929618835
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,512,2,0,0.228383998076121
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,512,1,0,0.4163573185602824
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,512,2,0,0.1746079921722412
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,512,8,0,0.1671733260154724
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,512,1,0,0.2682346701622009
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,512,4,0,0.16180800398190817
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,512,4,0,0.16750933726628622
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,512,8,0,0.15555733442306519
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,512,16,0,0.12878400087356567
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,512,16,0,0.2376906673113505
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,512,32,0,0.16470932960510254
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,512,32,0,0.1362933317820231
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,512,64,0,0.15973333517710367
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,512,64,0,0.13218667109807333
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,512,128,0,0.16267733772595724
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,512,128,0,0.16007999579111734
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,512,2,0,0.4022879997889201
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,512,1,0,0.7636000315348307
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,512,2,0,0.27001599470774335
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,512,4,0,0.22748800118764242
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,512,1,0,0.4970080057779948
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,512,4,0,0.16890132427215576
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,512,8,0,0.1376586655775706
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,512,8,0,0.15985066692034403
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,512,16,0,0.1283626655737559
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,512,16,0,0.16182399789492288
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,512,32,0,0.16473066806793213
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,512,64,0,0.16567466656366983
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,512,32,0,0.17258665959040323
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,512,64,0,0.16219199697176614
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,512,128,0,0.1611840029557546
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,512,128,0,0.1569493313630422
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,512,2,0,0.754202683766683
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,512,2,0,0.4951680103937785
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,512,1,0,1.4567999839782715
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,512,4,0,0.4014293352762858
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,512,4,0,0.277946670850118
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,512,8,0,0.22882133722305298
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,512,8,0,0.17671465873718262
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,512,16,0,0.1728960076967875
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,512,16,0,0.16939733425776163
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,512,1,0,0.9455946286519369
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,512,32,0,0.16426666577657065
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,512,32,0,0.12352533141771953
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,512,64,0,0.14441066980361938
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,512,64,0,0.16582399606704712
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,512,128,0,0.10708266496658325
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,512,128,0,0.15506666898727417
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,512,2,0,1.4432479540507
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,512,2,0,0.963530699412028
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,512,1,0,1.8210080464680989
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,512,1,0,2.8416639963785806
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,512,4,0,0.5061386823654175
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,512,4,0,0.7554720242818197
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,512,8,0,0.2895946701367696
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,512,8,0,0.41074132919311523
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,512,32,0,0.15744533141454062
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,512,16,0,0.18307733535766602
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,512,16,0,0.2630239923795064
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,512,32,0,0.1647040049235026
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,512,64,0,0.1604106624921163
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,512,64,0,0.16448000073432922
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,512,128,0,0.1523360013961792
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,512,128,0,0.15051733454068503
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,32,512,2,0,1.856287956237793
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,32,512,2,0,2.8273226420084634
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,32,512,1,0,3.562191963195801
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,32,512,4,0,2.1325012842814126
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,32,512,8,0,0.783023993174235
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,32,512,8,0,0.5306719938913981
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,32,512,4,0,0.9623253345489502
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,32,512,1,0,5.5996748606363935
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,32,512,16,0,0.48470401763916016
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,32,512,16,0,0.3128906687100728
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,32,512,32,0,0.29157867034276325
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,32,512,32,0,0.20339200894037882
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,32,512,64,0,0.19501866896947226
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,32,512,128,0,0.16541866461435953
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,32,512,64,0,0.16411200165748596
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,32,512,128,0,0.15018666783968607
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,64,512,4,0,1.8722772598266602
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,64,512,8,0,1.506992022196452
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,64,512,4,0,2.880832036336263
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,64,512,2,0,3.6040852864583335
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,64,512,2,0,5.605248133341472
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,64,512,8,0,1.0140266418457031
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,64,512,1,0,7.0339202880859375
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,64,512,16,0,0.577733318010966
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,64,512,32,0,0.5400053262710571
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,64,512,16,0,0.973861296971639
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,64,512,32,0,0.36419200897216797
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,64,512,64,0,0.3470666805903117
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,64,512,1,0,11.138751983642578
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,64,512,64,0,0.25517332553863525
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,64,512,128,0,0.24848532676696777
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,64,512,128,0,0.20240533351898193
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,1024,2,0,0.17667200167973837
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,1024,1,0,0.29181333382924396
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,1024,1,0,0.2918986678123474
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,1024,4,0,0.8228106498718262
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,1024,2,0,0.17562667528788248
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,1024,4,0,0.15753066539764404
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,1024,8,0,0.15149866541226706
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,1024,8,0,0.15755200386047363
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,1024,16,0,0.1711733341217041
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,1024,16,0,0.14459199706713358
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,1024,32,0,0.16771199305852255
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,1024,32,0,0.6229333480199178
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,1024,64,0,0.16827199856440225
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,1024,64,0,0.1756640076637268
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,1024,128,0,0.1344053347905477
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,1024,128,0,0.14193600416183472
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,1024,1,0,0.584389328956604
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,1024,1,0,0.5357280174891154
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,1024,2,0,0.5871893167495728
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,1024,2,0,0.2973066568374634
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,1024,4,0,0.18081599473953247
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,1024,8,0,0.13803733388582864
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,1024,16,0,0.1339413324991862
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,1024,4,0,0.17724267641703287
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,1024,8,0,0.14426666498184204
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,1024,16,0,0.14618666966756186
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,1024,32,0,0.13646933436393738
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,1024,32,0,0.1721173326174418
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,1024,128,0,0.18729599316914877
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,1024,64,0,0.168229341506958
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,1024,128,0,0.16729066769282022
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,1024,64,0,0.17430933316548666
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,1024,1,0,1.0219199657440186
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,1024,2,0,0.5414026578267416
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,1024,1,0,1.0192373593648274
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,1024,2,0,0.5465386708577474
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,1024,4,0,0.2998666763305664
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,1024,4,0,0.30244266986846924
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,1024,8,0,0.18066666523615518
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,1024,8,0,0.19393599033355713
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,1024,16,0,0.2617119948069255
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,1024,16,0,0.14312533537546793
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,1024,32,0,0.16689066092173258
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,1024,32,0,0.17217600345611572
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,1024,64,0,0.17014400164286295
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,1024,64,0,0.17898666858673096
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,1024,128,0,0.17125866810480753
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,1024,128,0,0.2924906611442566
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,1024,2,0,1.0309120019276936
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,1024,1,0,1.9887733459472656
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,1024,2,0,1.02675199508667
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,1024,4,0,0.5530453523000082
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,1024,4,0,0.5507306655248007
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,1024,8,0,0.30992533763249713
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,1024,1,0,1.9921387036641438
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,1024,8,0,0.3128319978713989
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,1024,16,0,0.2092319925626119
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,1024,16,0,0.20774400234222412
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,1024,64,0,0.13617066542307535
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,1024,32,0,0.17100799083709717
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,1024,32,0,0.17152533928553262
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,1024,128,0,0.17422932386398315
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,1024,64,0,0.1734559933344523
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,1024,128,0,0.17313067118326822
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,1024,2,0,1.9906667073567708
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,1024,8,0,0.5722346703211466
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,1024,4,0,1.0521706740061443
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,1024,4,0,1.0548693339029949
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,1024,2,0,2.015066623687744
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,1024,1,0,3.895653406778971
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,1024,8,0,0.5794453223546346
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,1024,1,0,3.893066724141439
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,1024,16,0,0.3624266783396403
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,1024,16,0,0.3640799919764201
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,1024,32,0,0.23261332511901855
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,1024,32,0,0.23467199007670084
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,1024,64,0,0.17357333501180014
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,1024,64,0,0.1763733426729838
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,1024,128,0,0.16536532839139303
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,1024,128,0,0.12180266777674358
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,32,1024,4,0,2.0428640047709146
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,32,1024,8,0,1.1019360224405925
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,32,1024,2,0,3.9387839635213218
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,32,1024,8,0,1.1013279755910237
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,32,1024,4,0,2.0385279655456543
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,32,1024,2,0,3.9556268056233725
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,32,1024,16,0,0.679423967997233
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,32,1024,16,0,0.6782613595326742
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,32,1024,1,0,7.725573221842448
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,32,1024,32,0,0.4145066738128662
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,32,1024,32,0,0.415450652440389
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,32,1024,1,0,7.707802454630534
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,32,1024,64,0,0.28650667270024616
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,32,1024,128,0,0.220085342725118
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,32,1024,64,0,0.8781226476033529
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,32,1024,128,0,0.21772799889246622
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,1536,2,0,0.2532586654027303
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,1536,1,0,0.9094346364339193
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,1536,1,0,0.45236265659332275
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,1536,2,0,0.2526080012321472
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,1536,8,0,0.15683733423550925
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,1536,4,0,0.16411200165748596
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,1536,4,0,0.17122133572896323
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,1536,8,0,0.1602666676044464
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,1536,16,0,0.17798399925231934
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,1536,16,0,0.1680906613667806
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,1536,32,0,0.17519466082255045
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,1536,32,0,0.17017600933710733
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,1536,64,0,0.16313599546750387
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,1536,64,0,0.17481066783269247
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,1536,128,0,0.17788799603780112
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,1536,128,0,0.10858133435249329
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,1536,1,0,0.8480640252431234
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,1536,1,0,0.8448426723480225
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,1536,2,0,0.4556533495585124
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,1536,4,0,0.2701333363850911
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,1536,4,0,0.2602506677309672
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,1536,2,0,0.4576266606648763
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,1536,8,0,0.16709333658218384
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,1536,16,0,0.1774186690648397
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,1536,8,0,0.17221866051355997
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,1536,16,0,0.1679146687189738
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,1536,32,0,0.32396266857783
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,1536,32,0,0.1750346620877584
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,1536,64,0,0.16821332772572836
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,1536,128,0,0.17683732509613037
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,1536,64,0,0.17469867070515951
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,1536,128,0,0.17805866400400797
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,1536,1,0,1.6248373985290527
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,1536,2,0,0.8498613039652506
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,1536,2,0,0.8483200073242188
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,1536,1,0,1.624575932820638
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,1536,4,0,0.46082135041554767
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,1536,4,0,0.4678933223088582
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,1536,8,0,0.26604266961415607
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,1536,8,0,0.2680213252703349
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,1536,16,0,0.1832480033238729
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,1536,16,0,0.18628267447153726
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,1536,32,0,0.1747679909070333
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,1536,32,0,0.1760693391164144
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,1536,64,0,0.16491732994715372
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,1536,64,0,0.17783466974894205
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,1536,128,0,0.16518400112787882
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,1536,128,0,0.17124799887339273
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,1536,4,0,0.8690133094787598
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,1536,1,0,3.2282613118489585
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,1536,8,0,0.47889065742492676
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,1536,4,0,0.8720106283823649
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,1536,2,0,1.640501340230306
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,1536,8,0,0.4789546728134155
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,1536,1,0,3.2152427037556968
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,1536,2,0,1.646549383799235
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,1536,16,0,0.30476266145706177
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,1536,16,0,0.305567999680837
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,1536,32,0,0.19709332784016928
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,1536,32,0,0.1970133384068807
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,1536,64,0,0.1657813290754954
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,1536,64,0,0.21979200839996338
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,1536,128,0,0.17208532492319742
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,1536,128,0,0.18202133973439535
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,1536,4,0,1.6995360056559246
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,1536,4,0,1.6801546414693196
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,1536,2,0,3.2207892735799155
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,1536,8,0,0.9133919874827067
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,1536,2,0,3.2435038884480796
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,1536,1,0,6.304410934448242
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,1536,8,0,0.9107893308003744
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,1536,1,0,6.347551981608073
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,1536,16,0,0.5579893191655477
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,1536,16,0,0.5584693352381388
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,1536,32,0,0.3473493258158366
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,1536,64,0,0.23736000061035156
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,1536,32,0,0.34492266178131104
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,1536,64,0,0.23777600129445395
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,1536,128,0,0.18594133853912354
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,1536,128,0,0.18392000595728555
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,2048,1,0,0.6340533494949341
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,2048,2,0,0.34700798988342285
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,2048,1,0,0.6449333429336548
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,2048,2,0,0.8581173419952393
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,2048,4,0,0.6073333422342936
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,2048,4,0,0.20848000049591064
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,2048,8,0,0.8090399901072184
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,2048,32,0,0.16330666343371072
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,2048,16,0,0.16201066970825195
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,2048,8,0,0.16474133729934692
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,2048,64,0,0.16239466269810995
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,2048,16,0,0.3646293481190999
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,2048,32,0,0.7021013100941976
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,2048,64,0,0.17362133661905924
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,2048,128,0,0.16207999984423319
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,2048,128,0,0.16763200362523398
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,2048,2,0,0.6349600156148275
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,2048,1,0,1.209887981414795
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,2048,1,0,1.2083306312561035
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,2048,2,0,0.6392373243967692
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,2048,4,0,0.3490560054779053
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,2048,4,0,0.35395201047261554
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,2048,8,0,0.2104319930076599
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,2048,8,0,0.2092640002568563
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,2048,32,0,0.16531733671824136
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,2048,16,0,0.13338133692741394
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,2048,16,0,0.164000004529953
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,2048,32,0,0.1673439939816793
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,2048,64,0,0.16615999738375345
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,2048,128,0,0.16949333747227988
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,2048,64,0,0.8377333482106527
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,2048,128,0,0.4471413294474284
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,2048,2,0,1.2191147009531658
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,2048,2,0,1.206544001897176
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,2048,4,0,0.6459039847056071
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,2048,1,0,2.3531200091044107
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,2048,4,0,0.6473333438237509
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,2048,8,0,0.3632906675338745
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,2048,1,0,2.3537920316060386
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,2048,8,0,0.36746132373809814
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,2048,16,0,0.21955200036366782
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,2048,32,0,0.17478932936986288
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,2048,32,0,0.18668800592422485
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,2048,16,0,0.22307199239730835
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,2048,64,0,0.18658133347829184
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,2048,64,0,0.17933867375055948
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,2048,128,0,0.18119466304779053
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,2048,128,0,0.17417599757512411
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,2048,4,0,1.2398026784261067
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,2048,2,0,2.3537119229634604
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,2048,4,0,1.2392693360646565
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,2048,8,0,0.6718026796976725
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,2048,2,0,2.3581226666768393
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,2048,1,0,4.616623878479004
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,2048,1,0,4.613125483194987
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,2048,8,0,0.6737919648488363
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,2048,16,0,0.3872106472651164
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,2048,16,0,0.3915199836095174
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,2048,32,0,0.2633013327916463
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,2048,32,0,0.2629280090332031
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,2048,64,0,0.18383999665578207
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,2048,64,0,0.2170026699701945
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,2048,128,0,0.17695466677347818
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,2048,128,0,0.14265599846839905
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,2048,8,0,1.2886613210042317
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,2048,4,0,2.4062347412109375
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,2048,2,0,4.638496081034343
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,2048,4,0,2.398789405822754
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,2048,8,0,1.2874399820963542
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,2048,16,0,0.7253386974334717
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,2048,2,0,4.658351898193359
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,2048,16,0,0.7243359883626302
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,2048,32,0,0.4683733383814494
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,2048,1,0,9.12068239847819
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,2048,32,0,0.46861334641774494
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,2048,1,0,9.129482905069986
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,2048,64,0,0.3168639938036601
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,2048,64,0,0.3229440053304036
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,2048,128,0,0.23923200368881226
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,2048,128,0,0.2383520007133484
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,3072,2,0,0.5625386635462443
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,3072,1,0,1.0515519777933757
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,3072,4,0,0.3201226592063904
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,3072,1,0,1.0423786640167236
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,3072,2,0,0.5642559925715128
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,3072,4,0,0.32365866502126056
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,3072,8,0,0.19767999649047852
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,3072,8,0,0.2003893256187439
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,3072,16,0,0.16299733519554138
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,3072,16,0,0.16378133495648703
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,3072,32,0,0.1729653278986613
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,3072,128,0,0.16846400499343872
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,3072,32,0,0.17084266742070517
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,3072,64,0,0.1688800056775411
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,3072,64,0,0.1632266640663147
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,3072,128,0,0.16606400410334268
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,3072,1,0,2.04748805363973
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,3072,2,0,1.0585013230641682
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,3072,2,0,1.0684800148010254
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,3072,4,0,0.571066657702128
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,3072,1,0,2.032309373219808
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,3072,8,0,0.3303520083427429
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,3072,4,0,0.5702613194783529
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,3072,8,0,0.3286133408546448
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,3072,16,0,0.20300267140070596
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,3072,16,0,0.20182400941848755
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,3072,64,0,0.14405333002408346
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,3072,32,0,0.1467519998550415
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,3072,32,0,0.20291733741760254
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,3072,64,0,0.1656000018119812
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,3072,128,0,0.1686240037282308
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,3072,128,0,0.17949867248535156
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,3072,2,0,2.0791519482930503
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,3072,4,0,1.0786506334940593
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,3072,1,0,4.011040051778157
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,3072,4,0,1.078986644744873
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,3072,8,0,0.5905706485112509
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,3072,2,0,2.0550400416056314
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,3072,1,0,3.9996105829874673
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,3072,8,0,0.5937013228734335
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,3072,16,0,0.34761067231496173
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,3072,16,0,0.3433973391850789
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,3072,32,0,0.23567465941111246
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,3072,64,0,0.1787359913190206
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,3072,32,0,0.2371413310368856
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,3072,64,0,0.16882665952046713
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,3072,128,0,0.17880533138910928
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,3072,128,0,0.18076266845067343
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,3072,4,0,2.085360050201416
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,3072,2,0,4.035861333211263
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,3072,8,0,1.117146650950114
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,3072,4,0,2.101861317952474
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,3072,2,0,4.038079897562663
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,3072,8,0,1.1493173440297444
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,3072,16,0,0.631770650545756
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,3072,16,0,0.6297653516133627
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,3072,1,0,7.899232228597005
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,3072,32,0,0.4129386742909749
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,3072,32,0,0.41095999876658124
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,3072,1,0,7.922362645467122
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,3072,64,0,0.2731519937515259
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,3072,64,0,0.27670933802922565
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,3072,128,0,0.2067413330078125
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,3072,128,0,0.20560532808303833
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,4096,1,0,1.5704906781514485
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,4096,2,0,0.8274719715118408
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,4096,2,0,0.8244000275929769
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,4096,4,0,0.45748265584309894
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,4096,1,0,1.613983949025472
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,4096,8,0,0.2682080070177714
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,4096,4,0,0.4551839828491211
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,4096,8,0,0.2733813325564067
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,4096,16,0,0.17978666226069132
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,4096,32,0,0.16491732994715372
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,4096,32,0,0.3485226631164551
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,4096,64,0,0.1705013314882914
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,4096,16,0,0.17811199029286703
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,4096,64,0,0.16713066895802817
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,4096,128,0,0.16481066743532816
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,4096,128,0,0.15103466312090555
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,4096,2,0,1.606218655904134
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,4096,1,0,3.060175895690918
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,4096,2,0,1.5765172640482585
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,4096,8,0,0.4699999888737996
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,4096,4,0,0.8440053462982178
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,4096,8,0,0.46847466627756756
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,4096,1,0,3.073333422342936
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,4096,16,0,0.282858669757843
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,4096,4,0,0.8365973631540934
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,4096,16,0,0.28059200445810956
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,4096,32,0,0.1852746605873108
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,4096,32,0,0.18826133012771606
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,4096,64,0,0.17568532625834146
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,4096,64,0,0.1647040049235026
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,4096,128,0,0.16409599781036377
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,4096,128,0,0.15839466452598572
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,4096,8,0,0.860309362411499
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,4096,4,0,1.6015413602193196
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,4096,2,0,3.0937118530273438
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,4096,4,0,1.6408799489339192
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,4096,8,0,0.8654507001241049
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,4096,2,0,3.0922667185465493
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,4096,16,0,0.49609601497650146
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,4096,16,0,0.48957868417104083
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,4096,1,0,6.070549647013347
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,4096,1,0,6.04802131652832
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,4096,32,0,0.3035999933878581
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,4096,64,0,0.2204586664835612
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,4096,32,0,0.3110186656316121
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,4096,64,0,0.22388267517089844
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,4096,128,0,0.1679146687189738
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,4096,128,0,0.17227200667063394
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,4096,8,0,1.65720001856486
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,4096,8,0,1.6525440216064453
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,4096,4,0,3.1410719553629556
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,4096,4,0,3.1561387379964194
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,4096,2,0,6.066527684529622
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,4096,16,0,0.913487990697225
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,4096,2,0,6.074805577596028
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,4096,16,0,0.912384033203125
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,4096,32,0,0.5506240129470825
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,4096,32,0,0.5460160175959269
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,4096,64,0,0.37681599458058673
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,4096,128,0,0.27689067522684735
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,4096,64,0,0.3792213201522827
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,4096,128,0,0.27666133642196655
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,4096,1,0,11.925567626953125
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,4096,1,0,11.965941111246744
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,6144,2,0,1.480240027109782
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,6144,4,0,0.7943306763966879
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,6144,2,0,1.4755412737528484
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,6144,1,0,2.8559306462605796
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,6144,4,0,0.7927947044372559
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,6144,8,0,0.44515732924143475
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,6144,1,0,2.905130704243978
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,6144,8,0,0.44686933358510333
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,6144,16,0,0.273637334505717
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,6144,16,0,0.277893324693044
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,6144,32,0,0.1869653264681498
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,6144,32,0,0.1973066727320353
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,6144,64,0,0.17422932386398315
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,6144,64,0,0.18001067638397217
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,6144,128,0,0.17914666732152304
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,6144,128,0,0.16777066389719644
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,6144,2,0,2.879221280415853
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,6144,4,0,1.497754732767741
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,6144,2,0,2.8849919637044272
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,6144,4,0,1.5006720225016277
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,6144,8,0,0.8123839696248373
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,6144,1,0,5.623802820841472
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,6144,1,0,5.613141377766927
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,6144,8,0,0.8128960132598877
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,6144,16,0,0.4692106644312541
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,6144,16,0,0.4675840139389038
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,6144,32,0,0.29264533519744873
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,6144,32,0,0.2956746617952983
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,6144,64,0,0.21708800395329794
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,6144,64,0,0.21793067455291748
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,6144,128,0,0.2293226718902588
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,6144,128,0,0.15727999806404114
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,6144,8,0,1.5416480700174968
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,6144,4,0,2.9039999643961587
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,6144,2,0,5.6597442626953125
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,6144,8,0,1.5420746803283691
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,6144,4,0,2.922661463419596
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,6144,16,0,0.851482629776001
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,6144,2,0,5.651125590006511
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,6144,16,0,0.8477439880371094
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,6144,32,0,0.5072319904963175
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,6144,32,0,0.5111306508382162
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,6144,64,0,0.3481493393580119
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,6144,64,0,0.3497813145319621
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,6144,128,0,0.25411732991536456
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,6144,128,0,0.25354133049647015
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,6144,1,0,11.14245351155599
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,6144,1,0,11.175999959309896
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,8192,2,0,2.3462346394856772
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,8192,4,0,1.2191840012868245
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,8192,8,0,0.6730186939239502
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,8192,4,0,1.220031976699829
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,8192,2,0,2.328986644744873
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,8192,8,0,0.6791573365529379
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,8192,16,0,0.3975199858347575
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,8192,1,0,4.522373199462891
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,8192,16,0,0.3982400099436442
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,8192,32,0,0.3052000006039937
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,8192,1,0,4.542474746704102
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,8192,32,0,0.2593280076980591
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,8192,64,0,0.17974932988484701
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,8192,64,0,0.1792479952176412
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,8192,128,0,0.17350399494171143
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,8192,128,0,0.17385600010553995
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,8192,8,0,1.2499039967854817
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,8192,4,0,2.3796159426371255
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,8192,8,0,1.2445333003997803
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,8192,2,0,4.538341204325358
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,8192,2,0,4.548720041910808
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,8192,4,0,2.3535359700520835
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,8192,16,0,0.6993920008341471
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,8192,16,0,0.7120693524678549
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,8192,32,0,0.42502399285634357
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,8192,32,0,0.42599467436472577
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,8192,64,0,0.2855253418286641
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,8192,64,0,0.2856373389561971
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,8192,128,0,0.21024000644683838
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,8192,1,0,9.027333577473959
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,8192,1,0,8.967450459798178
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,8192,128,0,0.2133973240852356
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,8192,8,0,2.4251999855041504
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,8192,8,0,2.3886027336120605
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,8192,4,0,4.607743899027507
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,8192,4,0,4.580016136169434
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,8192,16,0,1.3046560287475586
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,8192,16,0,1.3019893169403076
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,8192,2,0,8.965904235839844
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,8192,32,0,0.7520586649576823
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,8192,32,0,0.7519946893056234
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,8192,2,0,8.979290644327799
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,8192,64,0,0.476800004641215
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,8192,64,0,0.47470935185750324
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,8192,128,0,0.354751984278361
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,8192,128,0,0.37562131881713867
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,8192,1,0,17.99060821533203
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,8192,1,0,17.761450449625652
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,10240,4,0,1.7315413157145183
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,10240,4,0,1.7505440711975098
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,10240,2,0,3.335482597351074
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,10240,2,0,3.3374932607014975
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,10240,8,0,0.946565310160319
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,10240,8,0,0.9405386447906494
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,10240,1,0,6.581701278686523
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,10240,16,0,0.5422186851501465
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,10240,16,0,0.5592373212178549
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,10240,1,0,6.557055791219075
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,10240,32,0,0.33876268068949383
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,10240,32,0,0.34279998143513996
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,10240,64,0,0.23839465777079263
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,10240,64,0,0.23854400714238486
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,10240,128,0,0.1747200091679891
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,10240,128,0,0.17463467518488565
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,10240,8,0,1.7836586634318035
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,10240,4,0,3.3708372116088867
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,10240,4,0,3.3735733032226562
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,10240,2,0,6.574330647786458
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,10240,16,0,0.9753440221150717
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,10240,8,0,1.7891359329223633
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,10240,2,0,6.603562672932942
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,10240,16,0,0.9712426662445068
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,10240,32,0,0.5884639819463094
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,10240,32,0,0.581930677096049
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,10240,64,0,0.3720159928003947
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,10240,128,0,0.2797440091768901
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,10240,64,0,0.38073599338531494
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,10240,128,0,0.28294400374094647
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,10240,1,0,13.08965810139974
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,10240,1,0,13.207599639892578
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,12288,8,0,1.257962703704834
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,12288,4,0,2.3631466229756675
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,12288,4,0,2.351583957672119
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,12288,2,0,4.58403205871582
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,12288,2,0,4.5475413004557295
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,12288,16,0,0.7064266999562582
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,12288,8,0,1.2737013498942058
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,12288,32,0,0.4322773218154907
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,12288,16,0,0.7059786319732666
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,12288,1,0,9.22756258646647
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,12288,32,0,0.6694613297780355
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,12288,64,0,0.2961546579996745
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,12288,1,0,9.220346450805664
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,12288,64,0,0.29527467489242554
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,12288,128,0,0.20963199933369955
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,12288,128,0,0.812058687210083
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,12288,8,0,2.4033333460489907
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,12288,8,0,2.3862454096476235
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,12288,4,0,4.612085342407227
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,12288,4,0,4.603466669718425
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,12288,16,0,1.3062400023142497
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,12288,2,0,8.9988161722819
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,12288,16,0,1.2980480194091797
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,12288,32,0,0.7526986598968506
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,12288,32,0,0.7477280298868815
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,12288,2,0,8.997061411539713
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,12288,64,0,0.4752800067265828
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,12288,64,0,0.49232534567515057
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,12288,128,0,0.3450560172398885
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,12288,128,0,0.343120018641154
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,12288,1,0,18.31820805867513
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,12288,1,0,18.1278559366862
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,16384,4,0,3.9005066553751626
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,16384,4,0,3.8827358881632485
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,16384,8,0,2.0567092895507812
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,16384,8,0,2.0262667338053384
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,16384,2,0,7.696725209554036
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,16384,16,0,1.11517333984375
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,16384,16,0,1.1128586928049724
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,16384,2,0,7.678874969482422
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,16384,32,0,0.6484479904174805
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,16384,32,0,0.6505066553751627
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,16384,64,0,0.43281598885854083
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,16384,64,0,0.4516213337580363
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,16384,128,0,0.29628799359003705
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,16384,128,0,0.3044480085372925
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,16384,1,0,15.31381352742513
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,16384,1,0,15.157087961832682
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,16384,8,0,3.9323838551839194
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,16384,8,0,3.903631846110026
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,16384,4,0,7.674479802449544
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,16384,4,0,7.564943949381511
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,16384,16,0,2.0702667236328125
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,16384,16,0,2.0780159632364907
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,16384,32,0,1.1880906422932942
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,16384,32,0,1.1567946275075276
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,16384,64,0,0.7049492994944254
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,16384,64,0,0.704207976659139
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,16384,128,0,0.4826666514078776
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,16384,2,0,15.343482971191406
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,16384,128,0,0.48337066173553467
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,16384,2,0,15.587696075439453
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,16384,1,0,30.900367736816406
VLLM,0.12.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,16384,1,0,31.010528564453125
