framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,1,0.10886399944623311
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,1,0.11953066786130269
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,1,0.221343994140625
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,1,0.16576533516248068
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,1,0.16180800398190817
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,1,0.21899733940760294
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,1,0.16476266582806906
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,1,0.2216906746228536
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,1,0.1628373364607493
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,1,0.20720533529917398
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,1,0.16621333360671997
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,1,0.15052266915639242
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,1,0.2867786685625712
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,1,0.19826134045918783
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,1,0.11860799789428711
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,1,0.21289600928624472
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,1,0.11396800478299458
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,1,0.22446399927139282
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,1,0.16064533591270447
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,1,0.2207039992014567
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,1,0.204912006855011
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,1,0.22463999191919962
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,1,0.24161599079767862
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,1,0.16420267025629678
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,1,0.21251734097798666
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,1,0.12870400150616965
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,1,0.2201333244641622
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,1,0.16911999384562174
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,1,0.11173866192499797
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,1,0.2353973388671875
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,1,0.16474133729934692
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,1,0.10859733819961548
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,1,0.16722132762273154
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,1,0.20457599560419717
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,1,0.11963733037312825
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,1,0.17358400424321493
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,1,0.1602720022201538
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,1,0.23226133982340494
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,1,0.16074132919311523
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,1,0.23522132635116577
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,1,0.23206400871276855
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,1,0.20736533403396606
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,1,0.12967466314633688
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,1,0.15133333206176758
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,1,0.229418675104777
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,1,0.22617600361506143
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,1,0.1872053345044454
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,1,0.11932800213495891
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,1,0.15873600045839945
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,1,0.23055466016133627
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,1,0.15971733132998148
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,1,0.2298240065574646
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,1,0.1661066710948944
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,1,0.1900320053100586
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,1,0.2233546574910482
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,1,0.13846932848294577
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,1,0.17725332578023276
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,1,0.229477326075236
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,1,0.31357866525650024
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,1,0.25620800256729126
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,1,0.17200533548990884
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,1,0.16357866923014322
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,1,0.1807146668434143
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,1,0.19901333252588907
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,1,0.17011199394861856
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,1,0.2265066703160604
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,1,0.16425066192944845
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,1,0.16643200318018594
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,1,0.22523200511932373
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,1,0.23317333062489828
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,1,0.18500266472498575
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,1,0.17887999614079794
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,1,0.136053333679835
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,1,0.2325920065244039
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,1,0.22833067178726196
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,1,0.16896533966064453
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,1,0.13023466865221658
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,1,0.17815999190012613
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,1,0.16085333625475565
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,1,0.214464008808136
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,1,0.1676373283068339
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,1,0.1669173240661621
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,1,0.11897599697113037
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,1,0.2256586750348409
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,1,0.16588800152142844
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,1,0.09859733780225118
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,1,0.1834826668103536
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,1,0.23097066084543863
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,1,0.13250666856765747
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,1,0.2327679991722107
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,1,0.25362666447957355
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,1,0.16124266386032104
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,1,0.16620799899101257
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,1,0.12686933080355325
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,1,0.23493333657582602
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,1,0.36564799149831134
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,1,0.10270399848620097
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,1,0.10115200281143188
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,1,0.1967946688334147
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,1,0.23230934143066406
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,1,0.16198399662971497
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,1,0.18746666113535562
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,1,0.1578933298587799
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,1,0.14808000127474466
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,1,0.13238400220870972
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,1,0.1922773321469625
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,1,0.1581546664237976
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,1,0.18157333135604858
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,1,0.16401599844296774
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,1,0.2365600069363912
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,1,0.1657813290754954
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,1,0.4019840161005656
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,1,0.17336533466974893
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,1,0.22828799486160278
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,1,0.23014932870864868
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,1,0.16343466440836588
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,1,0.16661866505940756
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,1,0.1690559983253479
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,1,0.09946133693059285
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,1,0.23036799828211466
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,1,0.15076800187428793
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,1,0.20895467201868692
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,1,0.17068266868591309
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,1,0.1773759921391805
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,1,0.13220799962679544
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,1,0.16570666432380676
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,1,0.12809600432713827
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,1,0.1620213290055593
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,1,0.1869866649309794
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,1,0.1362506647904714
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,1,0.2548000017801921
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,1,0.2412266731262207
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,1,0.10241066416104634
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,1,0.16050666570663452
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,1,0.1051573355992635
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,1,0.1816106637318929
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,1,0.13129066427548727
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,1,0.236735999584198
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,1,0.16040533781051636
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,1,0.24050132433573404
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,1,0.17014400164286295
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,1,0.29209067424138385
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,1,0.2319413423538208
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,1,0.16673066218694052
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,1,0.3378346761067708
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,1,0.4681546688079834
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,1,0.2087519963582357
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,1,0.19011733929316202
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,1,0.26785600185394287
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,1,0.18131200472513834
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,1,0.13221866885821024
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,1,0.1904426614443461
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,1,0.16636266311009726
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,1,0.2382240096728007
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,1,0.1649066706498464
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,1,0.1909653345743815
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,1,0.1644319991270701
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,1,0.14714133739471436
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,1,0.21041599909464517
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,1,0.1670773426691691
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,1,0.8799786567687988
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,1,0.5195626815160116
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,1,0.3338559865951538
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,1,0.18594666322072348
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,1,0.32417599360148114
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,1,0.4583359956741333
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,1,0.18306666612625122
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,1,0.15875200430552164
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,1,0.1696000099182129
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,1,0.19951999187469482
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,1,0.23908267418543497
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,1,0.17045332988103232
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,1,0.16936000188191733
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,1,0.17302932341893515
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,1,0.2179786761601766
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,1,0.23821866512298584
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,3,0.1381493310133616
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,3,0.12069867054621379
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,3,0.1579200029373169
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,3,0.2242506742477417
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,3,0.15380799770355225
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,3,0.15757333238919577
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,3,0.218666672706604
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,3,0.878111998240153
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,3,0.1574560006459554
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,3,0.15833600362141928
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,3,0.21669334173202515
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,3,0.15828800201416016
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,3,0.22322134176890054
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,3,0.1956160068511963
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,3,0.2685226599375407
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,3,0.1762133240699768
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,3,0.12921599547068277
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,3,0.15030399958292642
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,3,0.15895467003186545
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,3,0.2421919902165731
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,3,0.16074132919311523
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,3,0.16351466377576193
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,3,0.22945600748062134
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,3,0.17709867159525552
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,3,0.2225333253542582
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,3,0.16353600223859152
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,3,0.16196266810099283
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,3,0.1809013287226359
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,3,0.15896000464757284
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,3,0.1682986617088318
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,3,0.1604693333307902
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,3,0.19979733228683472
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,3,0.3017973303794861
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,3,0.14803733428319296
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,3,0.16369600097338358
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,3,0.22822399934132895
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,3,0.12635733683904013
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,3,0.22672533988952637
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,3,0.16602133711179098
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,3,0.23783999681472778
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,3,0.12663466731707254
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,3,0.23114667336146036
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,3,0.10964799920717876
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,3,0.1819253365198771
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,3,0.12306666374206543
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,3,0.18126400311787924
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,3,0.16182933251063028
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,3,0.20109333594640097
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,3,0.2847786744435628
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,3,0.16127999623616537
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,3,0.22483199834823608
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,3,0.16707199811935425
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,3,0.218831996122996
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,3,0.19021866718928018
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,3,0.16220266620318094
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,3,0.1609226663907369
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,3,0.2316640019416809
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,3,0.23082667589187622
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,3,0.164682666460673
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,3,0.2352959911028544
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,3,0.1630400021870931
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,3,0.15973866979281107
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,3,0.2283946673075358
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,3,0.15502933661142984
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,3,0.16355733076731363
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,3,0.2032853364944458
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,3,0.17057599623998007
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,3,0.16365866859753928
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,3,0.17505067586898804
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,3,0.12477866808573405
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,3,0.21954667568206787
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,3,0.22054932514826456
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,3,0.1705333391825358
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,3,0.228383998076121
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,3,0.38408533732096356
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,3,0.27139200766881305
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,3,0.22428800662358603
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,3,0.16260799765586853
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,3,0.17046932379404703
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,3,0.20517333348592123
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,3,0.24817599852879843
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,3,0.1434719959894816
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,3,0.13011733690897623
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,3,0.23753066857655844
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,3,0.16434666514396667
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,3,0.20534400145212808
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,3,0.21966399749120077
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,3,0.16445866227149963
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,3,0.16756800810496011
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,3,0.16168000300725302
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,3,0.22770132621129355
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,3,0.15852800011634827
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,3,0.24259734153747559
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,3,0.20893865823745728
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,3,0.20468266805013022
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,3,0.1292639970779419
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,3,0.16160533825556436
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,3,0.12336533268292744
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,3,0.18336532513300577
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,3,0.21094399690628052
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,3,0.16668800512949625
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,3,0.2237280011177063
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,3,0.16359999775886536
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,3,0.18337599436442056
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,3,0.10345066587130229
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,3,0.1262986660003662
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,3,0.19061332941055298
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,3,0.23089067141215006
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,3,0.23483200867970785
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,3,0.16332800189654031
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,3,0.12812800208727518
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,3,0.2087413271268209
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,3,0.15921066204706827
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,3,0.1699840029080709
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,3,0.2371786634127299
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,3,0.16680532693862915
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,3,0.22742400566736856
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,3,0.1976426641146342
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,3,0.1599573294321696
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,3,0.18499199549357095
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,3,0.15019733707110086
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,3,0.16230400403340658
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,3,0.2411200006802877
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,3,0.23245332638422647
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,3,0.1334826648235321
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,3,0.16244266430536905
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,3,0.24344533681869507
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,3,0.1657919983069102
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,3,0.15852800011634827
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,3,0.25482134024302167
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,3,0.16658133268356323
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,3,0.16263467073440552
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,3,0.43835731347401935
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,3,0.24165334304173788
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,3,0.161189337571462
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,3,0.19477866093317667
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,3,0.13091199596722922
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,3,0.23028800884882608
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,3,0.16266133387883505
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,3,0.2305813431739807
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,3,0.11616533001263936
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,3,0.1881706714630127
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,3,0.1614346702893575
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,3,0.16487466295560202
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,3,0.19217065970102945
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,3,0.46700799465179443
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,3,0.26729599634806317
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,3,0.3380693197250366
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,3,0.13302933176358542
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,3,0.22847465674082437
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,3,0.16300800442695618
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,3,0.12693867087364197
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,3,0.20492267608642578
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,3,0.16421332955360413
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,3,0.16953599452972412
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,3,0.22911999622980753
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,3,0.2292799949645996
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,3,0.1808799902598063
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,3,0.16194666425387064
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,3,0.1597546637058258
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,3,0.33478399117787677
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,3,0.18817599614461264
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,3,0.17525867621103922
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,3,0.8785920143127441
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,3,0.5136746565500895
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,3,0.3220213254292806
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,3,0.14662933349609375
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,3,0.2243786652882894
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,3,0.2425653338432312
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,3,0.16876266400019327
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,3,0.19490132729212442
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,3,0.1625333329041799
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,7,0.13004799683888754
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,3,0.24012267589569092
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,7,0.2311306595802307
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,3,0.17614932854970297
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,7,0.1625599960486094
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,3,0.17047999302546182
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,7,0.22171199321746826
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,3,0.1651946703592936
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,7,0.16405866543451944
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,7,0.2190399964650472
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,7,0.2502400080362956
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,7,0.13275733590126038
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,7,0.21124267578125
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,7,0.207914670308431
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,7,0.16315733393033346
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,7,0.21630932887395224
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,7,0.16263467073440552
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,7,0.18812266985575357
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,7,0.15019200245539346
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,7,0.15557866295178732
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,7,0.1602133313814799
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,7,0.4722506602605184
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,7,0.1602186659971873
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,7,0.22805333137512207
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,7,0.2266826629638672
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,7,0.227290670077006
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,7,0.16200533509254456
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,7,0.1602773368358612
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,7,0.16524266203244528
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,7,0.16040533781051636
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,7,0.1811413367589315
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,7,0.22292266289393106
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,7,0.10427199800809224
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,7,0.17617599169413248
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,7,0.15821866194407144
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,7,0.2041920026143392
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,7,0.14999467134475708
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,7,0.12662933270136514
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,7,0.13078932960828146
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,7,0.16005333264668783
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,7,0.5797866582870483
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,7,0.20182400941848755
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,7,0.16396266222000122
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,7,0.2286720077196757
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,7,0.12786133090655008
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,7,0.15778133273124695
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,7,0.2348533272743225
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,7,0.16192000110944113
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,7,0.22686932484308878
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,7,0.2156053384145101
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,7,0.16305599610010782
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,7,0.20663466056187949
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,7,0.12878400087356567
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,7,0.1623360017935435
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,7,0.23484265804290771
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,7,0.16435733437538147
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,7,0.22573866446812949
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,7,0.2140000065167745
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,7,0.23383466402689615
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,7,0.16220266620318094
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,7,0.16108266512552896
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,7,0.21104532480239868
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,7,0.7642186482747396
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,7,0.21994666258494058
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,7,0.16151466965675354
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,7,0.23225067059199014
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,7,0.11988266309102376
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,7,0.20586667458216348
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,7,0.16505600015322366
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,7,0.8298186461130778
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,7,0.17090133825937906
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,7,0.22324800491333008
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,7,0.17381866772969565
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,7,0.185263991355896
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,7,0.16012266278266907
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,7,0.22866666316986084
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,7,0.12477333347002666
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,7,0.15953600406646729
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,7,0.2341759999593099
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,7,0.22789333264033
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,7,0.16075733304023743
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,7,0.29387199878692627
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,7,0.1630400021870931
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,7,0.21296000480651855
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,7,0.1798293391863505
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,7,0.5582613150278727
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,7,0.1483573317527771
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,7,0.22778666019439697
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,7,0.22304532925287882
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,7,0.16705065965652466
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,7,0.12256000439325969
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,7,0.20853867133458456
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,7,0.1313920021057129
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,7,0.22552533944447836
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,7,0.1629706621170044
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,7,0.22016000747680664
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,7,0.16299733519554138
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,7,0.23408534129460654
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,7,0.20816532770792642
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,7,0.1629706621170044
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,7,0.14060266812642416
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,7,0.2555413246154785
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,7,0.11620266238848369
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,7,0.1665066679318746
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,7,0.2092693249384562
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,7,0.16201600432395935
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,7,0.23469332853953043
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,7,0.17838933070500693
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,7,0.16396799683570862
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,7,0.22483734289805093
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,7,0.16485333442687988
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,7,0.23557867606480917
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,7,0.1627840002377828
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,7,0.22696532805760702
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,7,0.13142933448155722
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,7,0.15853333473205566
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,7,0.10504000385602315
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,7,0.16449600458145142
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,7,0.24659732977549234
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,7,0.23599465688069662
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,7,0.20873600244522095
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,7,0.136543999115626
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,7,0.1835520068804423
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,7,0.18595200777053833
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,7,0.13135466972986856
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,7,0.2217280069986979
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,7,0.16410133242607117
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,7,0.22131733099619547
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,7,0.16343466440836588
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,7,0.18648000558217367
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,7,0.15566399693489075
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,7,0.16873067617416382
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,7,0.1738026738166809
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,7,0.1632213294506073
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,7,0.2514773408571879
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,7,0.23010132710138956
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,7,0.1649066706498464
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,7,0.1609226663907369
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,7,0.2347360054651896
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,7,0.18898133436838785
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,7,0.16659733653068542
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,7,0.22813334067662558
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,7,0.15524267156918845
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,7,0.1778293251991272
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,7,0.13201066851615906
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,7,0.22374399503072104
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,7,0.16410666704177856
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,7,0.16369066635767618
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,7,0.33795201778411865
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,7,0.19116799036661783
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,7,0.46528534094492596
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,7,0.2667733430862427
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,7,0.13788800438245138
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,7,0.22791999578475952
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,7,0.1623360017935435
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,7,0.18382400274276733
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,7,0.16300800442695618
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,7,0.23265600204467773
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,7,0.15889066457748413
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,7,0.23169066508611044
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,7,0.16784000396728516
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,7,0.15702933073043823
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,7,0.1798186699549357
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,7,0.20348799228668213
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,7,0.3279306689898173
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,7,0.17430400848388672
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,7,0.1872053345044454
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,7,0.32359466950098675
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,7,0.8786400159200033
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,7,0.5183413426081339
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,7,0.13808000087738037
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,7,0.22075732549031576
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,7,0.16754666964213052
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,7,0.2030400037765503
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,7,0.13294933239618936
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,7,0.2300586700439453
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,15,0.09816533327102661
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,7,0.16854933897654215
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,15,0.15893866618474325
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,7,0.24344533681869507
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,15,0.2174826661745707
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,15,0.21548799673716226
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,15,0.16116266449292502
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,15,0.218666672706604
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,15,0.16766933600107828
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,7,0.17990932861963907
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,15,0.21664534012476602
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,15,0.17620799938837686
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,15,0.162063995997111
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,15,0.16064533591270447
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,15,0.21670933564503989
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,15,0.1634666621685028
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,15,0.17449599504470825
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,15,0.15897066394488016
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,15,0.21076265970865884
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,7,0.2143626610438029
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,15,0.15685333808263144
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,15,0.16169066230456033
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,15,0.22893333435058594
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,15,0.126720001300176
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,15,0.22672533988952637
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,15,0.20338666439056396
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,15,0.15888532996177673
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,15,0.24687999486923218
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,15,0.23145065704981485
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,15,0.15591999888420105
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,15,0.1599146624406179
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,15,0.23134400447209677
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,15,0.16707199811935425
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,15,0.21939200162887573
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,15,0.16546666622161865
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,15,0.2432266672452291
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,15,0.3172373374303182
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,15,0.1585760017236074
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,15,0.2268106738726298
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,15,0.222378671169281
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,15,0.15924800435702005
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,15,0.16403200229008993
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,15,0.15948266784350076
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,15,0.21902400255203247
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,15,0.22133866945902506
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,15,0.22712000211079916
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,15,0.16695467631022134
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,15,0.22750399510065714
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,15,0.13938132921854654
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,15,0.19818667570749918
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,15,0.16061333815256754
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,15,0.20010666052500406
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,15,0.18754667043685913
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,15,0.16344533363978067
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,15,0.2285333275794983
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,15,0.22873600323994955
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,15,0.15862933794657388
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,15,0.22381865978240967
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,15,0.1641386648019155
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,15,0.1206666628519694
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,15,0.23448532819747925
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,15,0.22140266497929892
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,15,0.19093332688013712
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,15,0.1267039974530538
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,15,1.193562666575114
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,15,0.229477326075236
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,15,0.20041600863138834
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,15,0.15917866428693137
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,15,0.1600160002708435
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,15,0.22362132867177328
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,15,0.12706133723258972
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,15,0.23480532566706339
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,15,0.43665067354838055
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,15,0.1925813357035319
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,15,0.1299626628557841
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,15,0.22782933712005615
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,15,0.16077333688735962
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,15,0.2261013388633728
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,15,0.1330773333708445
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,15,0.2224959929784139
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,15,0.16528000434239706
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,15,0.23245332638422647
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,15,0.15594133734703064
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,15,0.20158400138219199
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,15,0.12006933490435283
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,15,0.1421440045038859
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,15,0.3232213258743286
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,15,0.24974934260050455
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,15,0.21864000956217447
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,15,0.1312320033709208
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,15,0.1588533322016398
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,15,0.18381865819295248
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,15,0.1653279960155487
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,15,0.9026400248209635
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,15,0.16416000326474509
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,15,0.1723466714223226
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,15,0.15820800264676413
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,15,0.22210667530695596
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,15,0.1665440003077189
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,15,0.17945067087809244
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,15,0.16990399360656738
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,15,0.1620746652285258
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,15,0.23444799582163492
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,15,0.19409066438674927
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,15,0.2287999987602234
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,15,0.15923200050989786
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,15,0.2170026699701945
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,15,0.17763733863830566
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,15,0.1295253336429596
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,15,0.1272586683432261
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,15,0.24241065979003906
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,15,0.22773865858713785
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,15,0.16194132963816324
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,15,0.15371200442314148
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,15,0.16135467092196146
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,15,0.16865599155426025
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,15,0.22654932737350464
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,15,0.15268799662590027
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,15,0.15995200475056967
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,15,0.23009600241978964
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,15,0.16037866473197937
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,15,0.22147732973098755
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,15,0.13335999846458435
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,15,0.22525332371393839
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,15,0.16331199804941812
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,15,0.9029599825541178
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,15,0.1621226668357849
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,15,0.23062400023142496
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,15,0.19087467590967813
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,15,0.16133866707483926
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,15,0.15877866744995117
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,15,0.16977600256601968
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,15,0.1649440030256907
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,15,0.18651199340820312
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,15,0.25253333648045856
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,15,0.214954674243927
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,15,0.16434133052825928
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,15,0.1869759956995646
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,15,0.1609653333822886
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,15,0.19478933016459146
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,15,0.16300266981124878
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,15,0.21754666169484457
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,15,0.16691199938456217
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,15,0.16205333669980368
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,15,0.16077333688735962
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,15,0.22527466217676798
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,15,0.2329919934272766
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,15,0.20702399810155234
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,15,0.3375146786371867
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,15,0.525050679842631
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,15,0.26477332909901935
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,15,0.1725013256072998
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,15,0.4561493396759033
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,15,0.22728000084559122
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,15,0.1660533348719279
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,15,0.2397866646448771
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,15,0.1772480010986328
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,15,0.16159466902414957
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,15,0.16225066781044006
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,15,0.2304426630338033
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,15,0.1641333301862081
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,15,0.16221867005030313
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,15,0.2370026707649231
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,15,0.17378133535385132
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,15,0.32891732454299927
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,15,0.8763573169708252
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,15,0.5102666616439819
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,15,0.1878719925880432
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,15,0.1755839983622233
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,15,0.31036800146102905
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,15,0.16962132851282755
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,15,0.21920533974965414
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,15,0.16713599363962808
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,15,0.22777066628138223
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,15,0.1305333375930786
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,15,0.2403306762377421
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,31,0.10440533359845479
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,15,0.16757865746816
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,31,0.21962666511535645
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,31,0.1578933298587799
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,15,0.23694932460784912
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,15,0.136053333679835
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,15,0.21632534265518188
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,31,0.22098666429519653
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,31,0.1917440096537272
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,31,0.16636799772580466
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,31,0.2140586574872335
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,31,0.1783519983291626
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,31,0.16395200292269388
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,31,0.1590986649195353
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,31,0.21100266774495444
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,31,0.1977226734161377
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,31,0.16445866227149963
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,31,0.21287999550501505
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,31,0.1535306672255198
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,31,0.19596266746520996
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,31,0.11885866522789001
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,31,0.16613866885503134
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,31,0.3296053409576416
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,31,0.22548800706863403
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,31,0.1411946713924408
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,31,0.22513065735499063
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,31,0.10981866717338562
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,31,0.22822932402292886
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,31,0.1611199975013733
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,31,0.22514132658640543
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,31,0.1258080005645752
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,31,0.22754132747650146
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,31,0.15847999850908914
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,31,0.23190933465957642
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,31,0.16150400042533875
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,31,0.20541866620381674
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,31,0.11310933033625285
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,31,0.22594666481018066
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,31,0.23368000984191895
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,31,0.15289599696795145
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,31,0.15891200304031372
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,31,0.20563199122746786
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,31,0.16555733482042947
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,31,0.22527466217676798
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,31,0.1285973290602366
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,31,0.18031466007232666
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,31,0.12824533383051553
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,31,0.22681599855422974
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,31,0.15594133734703064
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,31,0.19556266069412231
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,31,0.16035733620325723
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,31,0.20301334063212076
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,31,0.2338506579399109
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,31,0.1639359990755717
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,31,0.1256533364454905
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,31,0.22745599349339804
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,31,0.16342932979265848
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,31,0.17213332653045654
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,31,0.13289599617322287
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,31,0.16207999984423319
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,31,0.19957866271336874
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,31,0.16239999731381735
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,31,0.1595200002193451
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,31,0.23621867100397745
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,31,0.24191999435424805
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,31,0.16526933511098227
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,31,0.23341333866119385
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,31,0.21236799160639444
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,31,0.16702934106191
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,31,0.18693333864212036
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,31,0.158869336048762
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,31,0.16641599933306375
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,31,0.23147734006245932
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,31,0.22430932521820068
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,31,0.16221333543459573
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,31,0.2254026730855306
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,31,0.16129066546758017
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,31,0.2387146751085917
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,31,0.16055466731389365
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,31,0.16250133514404297
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,31,0.43140268325805664
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,31,0.22685333093007407
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,31,0.16057599584261575
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,31,0.21452800432840982
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,31,0.491706649462382
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,31,0.22252267599105835
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,31,0.22870934009552002
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,31,0.12401599685351054
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,31,0.22987200816472372
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,31,0.16869332393010458
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,31,0.19216533501942953
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,31,0.16236799955368042
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,31,0.22503467400868735
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,31,0.22315732638041177
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,31,0.28060799837112427
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,31,0.25148799022038776
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,31,0.1668213407198588
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,31,0.23172799746195474
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,31,0.154858668645223
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,31,0.20363734165827432
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,31,0.16238933801651
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,31,0.21610132853190103
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,31,0.1567039986451467
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,31,0.2250666618347168
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,31,0.16120533148447672
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,31,0.16657599806785583
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,31,0.885258674621582
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,31,0.2262399991353353
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,31,0.15736533204714456
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,31,0.22007999817530313
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,31,0.16287466883659363
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,31,0.15994667013486227
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,31,0.23384533325831094
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,31,0.16110933820406595
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,31,0.23357333739598593
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,31,0.20599466562271118
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,31,0.2632746696472168
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,31,0.22607467571894327
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,31,0.2258560061454773
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,31,0.16197333733240762
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,31,0.1642453372478485
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,31,0.23187732696533203
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,31,0.15613333384195963
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,31,0.22155199448267618
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,31,0.1655786633491516
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,31,0.2310453255971273
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,31,0.15874666968981424
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,31,0.16580266753832498
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,31,0.2326026757558187
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,31,0.22908800840377808
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,31,0.16036799550056458
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,31,0.20684800545374551
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,31,0.16552533706029257
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,31,0.1623253325621287
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,31,0.25731199979782104
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,31,0.2232746680577596
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,31,0.16566933194796243
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,31,0.1824586590131124
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,31,0.22461867332458496
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,31,0.16101333498954773
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,31,0.16075733304023743
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,31,0.23553599913915
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,31,0.16222932934761047
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,31,0.20332799355189005
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,31,0.16660267114639282
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,31,0.3968799908955892
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,31,0.16400532921155295
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,31,0.1913706660270691
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,31,0.33102933565775555
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,31,0.19353065888086954
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,31,0.4662933349609375
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,31,0.2632586757342021
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,31,0.16570666432380676
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,31,0.1715679963429769
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,31,0.22962667544682822
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,31,0.20636266469955444
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,31,0.11596799890200298
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,31,0.22698666652043661
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,31,0.2299413283665975
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,31,0.16993065675099692
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,31,0.1581760048866272
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,31,0.18492799997329712
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,31,0.16078933080037436
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,31,0.1667626698811849
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,31,0.3357386589050293
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,31,0.1856586734453837
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,31,0.5040640036265055
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,31,0.882858673731486
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,31,0.17279465993245444
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,31,0.17441600561141968
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,31,0.3073386748631795
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,31,0.33077865839004517
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,31,0.16500799854596457
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,31,0.23161600033442178
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,31,0.17061332861582437
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,31,0.16556800405184427
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,63,0.12852266430854797
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,31,0.22619734207789102
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,31,0.17088532447814941
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,63,0.1630453368028005
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,63,0.21938133239746094
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,63,0.1588053305943807
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,63,0.21978666385014853
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,63,0.21143466234207153
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,63,0.1632266640663147
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,63,0.21543999512990317
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,63,0.15942399700482687
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,63,0.21780800819396973
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,63,0.15898666779200235
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,31,0.22781866788864136
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,63,0.21467200915018717
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,63,0.21740265687306723
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,63,0.15774933497111002
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,63,0.15974400440851846
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,31,0.210042675336202
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,63,0.20383999745051065
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,63,0.3288586735725403
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,63,0.12890133261680603
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,63,0.32614932457606
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,63,0.22613332668940225
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,63,0.2304960091908773
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,63,0.1585493286450704
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,63,0.21960532665252686
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,63,0.16080533464749655
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,63,0.15971733132998148
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,63,0.16957332690556845
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,63,0.12654933333396912
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,63,0.2271626591682434
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,63,0.16540267070134482
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,63,0.16029333074887595
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,63,0.21632534265518188
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,63,0.1876586675643921
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,63,0.15990933775901794
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,63,0.22637865940729776
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,63,0.2323039968808492
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,63,0.15637333194414774
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,63,0.15664533774058023
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,63,0.2257279952367147
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,63,0.16435733437538147
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,63,0.15929599603017172
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,63,0.2307466665903727
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,63,0.22470933198928833
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,63,0.16285866498947144
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,63,0.23658132553100586
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,63,0.15986667076746622
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,63,0.2323840061823527
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,63,0.19950934251149496
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,63,0.20548266172409058
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,63,0.1574986676375071
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,63,0.16059199968973795
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,63,0.17841066916783652
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,63,0.16758400201797485
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,63,0.7976906299591064
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,63,0.22778666019439697
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,63,0.1636319955190023
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,63,0.22953067223230997
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,63,0.12315733234087627
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,63,0.1768266757329305
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,63,0.1305333375930786
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,63,0.1807146668434143
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,63,0.13166933258374533
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,63,0.2331626613934835
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,63,0.15877866744995117
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,63,0.1681279937426249
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,63,0.23342400789260864
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,63,0.10962133606274922
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,63,0.166485329469045
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,63,0.22740799188613892
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,63,0.1637173295021057
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,63,0.19436800479888916
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,63,0.16355199615160623
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,63,0.16294399897257486
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,63,0.1604426701863607
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,63,0.2163626750310262
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,63,0.22424532969792685
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,63,0.16545066237449646
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,63,0.17000534137090048
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,63,0.2241493264834086
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,63,0.16401599844296774
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,63,0.30966933568318683
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,63,0.36103467146555585
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,63,0.23695999383926392
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,63,0.34005868434906006
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,63,0.1697280009587606
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,63,0.22766399383544922
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,63,0.1975733240445455
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,63,0.13087999820709229
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,63,0.12380266189575195
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,63,0.37643734614054364
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,63,0.2134666641553243
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,63,0.12658666570981345
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,63,0.1630773345629374
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,63,0.1612106661001841
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,63,0.23406932751337686
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,63,0.2296853264172872
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,63,0.210314671198527
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,63,0.13793067137400308
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,63,0.22750399510065714
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,63,0.16394666830698648
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,63,0.22358399629592896
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,63,0.161189337571462
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,63,1.29694398244222
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,63,0.16596800088882446
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,63,0.22444800535837808
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,63,0.12761066357294717
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,63,0.22828267018000284
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,63,0.2330026626586914
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,63,0.16520532965660095
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,63,0.16200000047683716
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,63,0.48252801100413006
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,63,0.15834133823712668
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,63,0.2070186734199524
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,63,0.12946666280428568
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,63,0.19873599211374918
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,63,0.16778133312861124
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,63,0.24129066864649454
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,63,0.7998879750569662
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,63,0.2068906625111898
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,63,0.12024000287055969
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,63,0.2529333432515462
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,63,0.16268266240755716
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,63,0.1823199987411499
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,63,0.1630506714185079
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,63,0.2188053329785665
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,63,0.12180266777674358
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,63,0.22373332579930624
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,63,0.1690773367881775
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,63,0.2058133284250895
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,63,0.2531306743621826
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,63,0.17570666472117105
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,63,0.19632534186045328
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,63,0.22432533899943033
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,63,0.16992000738779703
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,63,0.16617066661516824
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,63,0.20523732900619507
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,63,0.22989332675933838
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,63,0.1320853332678477
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,63,0.22750399510065714
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,63,0.16271467010180155
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,63,0.17371733983357748
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,63,0.3938506841659546
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,63,0.31591999530792236
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,63,0.1604639987150828
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,63,0.1630880037943522
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,63,0.4703199863433838
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,63,0.3370826641718547
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,63,0.19830399751663208
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,63,0.2608799934387207
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,63,0.16401066382726034
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,63,0.22924266258875528
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,63,0.13191466530164084
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,63,0.1746506690979004
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,63,0.16053332885106406
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,63,0.22315732638041177
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,63,0.12652800480524698
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,63,0.23030932744344076
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,63,0.14525333046913147
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,63,0.27326399087905884
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,63,0.29012266794840497
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,63,0.20223466555277506
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,63,0.3330880006154378
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,63,0.19087467590967813
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,63,0.8755466938018799
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,63,0.5000480016072592
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,63,0.13766933480898538
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,63,0.2265173395474752
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,63,0.2999839981396993
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,63,0.23804799715677896
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,63,0.16658666729927063
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,63,0.1666933298110962
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,63,0.16816532611846924
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,63,0.22827200094858804
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,63,0.17857066790262857
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,127,0.19065600633621216
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,127,1.1689759890238445
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,127,0.17378133535385132
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,127,0.2232053279876709
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,127,0.15762666861216226
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,63,0.22698134183883667
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,127,0.16194132963816324
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,127,0.23946134249369302
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,127,0.1739679972330729
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,127,0.19446400801340738
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,127,0.2455199956893921
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,127,0.15730667114257812
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,127,0.21541333198547363
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,127,0.17401599884033203
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,127,0.21426665782928467
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,127,0.16100800037384033
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,127,0.20298665761947632
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,127,0.16005333264668783
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,127,0.17834667364756265
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,127,0.22830933332443237
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,127,0.2214240034421285
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,127,0.16057599584261575
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,127,0.23721599578857422
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,127,0.2307413419087728
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,127,0.15733333428700766
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,127,0.14754666884740195
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,63,0.20730666319529215
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,63,0.16402666767438254
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,127,0.22990399599075317
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,127,0.15959466497103372
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,127,0.9074079990386963
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,127,0.11060266693433125
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,127,0.16537599762280783
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,127,0.17652267217636108
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,127,0.2037973403930664
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,127,0.16194132963816324
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,127,0.23046932617823282
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,127,0.2404693365097046
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,127,0.1593280037244161
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,127,0.16426133116086325
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,127,0.15708266695340475
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,127,0.22131733099619547
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,127,0.22004799048105875
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,127,0.16179733475049338
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,127,0.224671999613444
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,127,0.16642133394877115
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,127,0.1650986671447754
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,127,0.15982932845751444
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,127,0.2264960010846456
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,127,0.21929067373275757
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,127,0.16218666235605875
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,127,0.10990933577219646
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,127,0.19594132900238037
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,127,0.17413334051767984
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,127,0.230512003103892
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,127,0.16220266620318094
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,127,0.23495999972025552
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,127,0.21922133366266885
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,127,0.12430933117866516
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,127,0.16126933693885803
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,127,0.27422932783762616
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,127,0.23521600166956583
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,127,0.16602133711179098
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,127,0.162282665570577
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,127,0.21993066867192587
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,127,0.17062934239705405
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,127,0.20492267608642578
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,127,0.1670666734377543
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,127,0.4638933340708415
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,127,0.2310240070025126
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,127,0.2254400054613749
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,127,0.16312000155448914
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,127,0.18181333939234415
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,127,0.16219199697176614
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,127,0.16125333309173584
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,127,0.17806400855382284
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,127,0.2230186661084493
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,127,0.16194666425387064
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,127,0.22418665885925293
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,127,0.16268266240755716
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,127,0.1595146656036377
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,127,0.22137065728505453
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,127,0.10591999689737956
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,127,0.23011734088261923
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,127,0.17130666971206665
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,127,0.15893333156903586
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,127,0.15920000274976095
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,127,0.2326080004374186
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,127,0.17613333463668823
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,127,0.16435199975967407
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,127,0.22327999273935953
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,127,0.1735466718673706
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,127,0.22642666101455688
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,127,0.1596213380495707
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,127,0.16634133458137512
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,127,0.194922665754954
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,127,0.23453332980473837
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,127,0.1602026621500651
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,127,0.1622773309548696
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,127,0.16164799531300864
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,127,0.32656532526016235
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,127,0.702234665552775
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,127,0.2349546750386556
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,127,0.15989866852760315
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,127,0.22964266935984293
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,127,0.1567520002524058
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,127,0.2163040041923523
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,127,0.16476266582806906
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,127,0.19818667570749918
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,127,0.12890133261680603
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,127,0.2315466602643331
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,127,0.16362667083740234
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,127,0.16201600432395935
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,127,0.20563733577728271
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,127,0.22633065780003866
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,127,0.16107733050982156
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,127,0.1461120049158732
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,127,0.17271467049916586
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,127,0.16106667121251425
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,127,0.20738667249679565
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,127,0.2328746716181437
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,127,0.12738666931788126
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,127,0.22968000173568726
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,127,0.1602133313814799
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,127,0.22248532374699911
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,127,0.16528000434239706
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,127,0.16275733709335327
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,127,0.20637333393096924
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,127,0.22202666600545248
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,127,0.20737600326538086
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,127,0.1304266651471456
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,127,0.19012266397476196
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,127,0.1685439944267273
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,127,0.22954134146372476
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,127,0.2676960031191508
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,127,0.16421866416931152
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,127,0.2234613299369812
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,127,0.16430933276812235
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,127,0.1782240072886149
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,127,0.13102400302886963
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,127,0.22623467445373535
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,127,0.22233599424362183
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,127,0.1628320018450419
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,127,0.16299733519554138
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,127,0.22781866788864136
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,127,0.19244267543156943
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,127,0.16370667020479837
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,127,0.4902506669362386
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,127,0.1776640017827352
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,127,1.5218133926391602
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,127,0.2856053312619527
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,127,0.3583039840062459
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,127,0.1938986579577128
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,127,0.13562132914861044
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,127,0.225983997186025
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,127,0.22075732549031576
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,127,0.16527466972668967
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,127,0.16365333398183188
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,127,0.16129600008328757
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,127,0.23015467325846353
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,127,0.1818186640739441
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,127,0.1662613352139791
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,127,0.2050079902013143
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,127,0.3582613468170166
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,127,0.9033066431681315
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,127,0.5252480109532675
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,127,0.21980265776316324
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,127,0.17962133884429932
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,127,0.3245973388353984
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,127,0.23430399099985758
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,127,0.17613865931828818
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,127,0.16964266697565714
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,127,0.23784534136454263
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,127,0.1693440079689026
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,127,0.2351413369178772
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,127,0.17068799336751303
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,255,0.1588159998257955
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,255,0.13723733027776083
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,255,0.17550400892893472
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,255,0.16076266765594482
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,255,0.15872533122698465
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,255,0.2073919971783956
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,127,0.42132798830668133
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,255,0.16544000307718912
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,255,0.2214240034421285
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,255,0.21567465861638388
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,127,0.2121493419011434
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,255,0.15955733259518942
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,255,0.1675893266995748
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,255,0.19479467471440634
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,127,0.13762666781743368
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,255,0.22386133670806885
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,255,0.16120533148447672
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,255,0.15415466825167337
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,255,0.198634664217631
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,255,0.15960533420244852
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,255,0.1585493286450704
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,255,0.16065067052841187
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,255,0.22766933838526407
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,255,0.16243732968966165
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,255,0.23530133565266928
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,255,0.1262453297773997
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,255,0.2265440026919047
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,255,0.16009599963823953
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,255,0.22511466344197592
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,255,0.15607466300328574
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,255,0.1627893348534902
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,255,0.23111999034881592
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,255,0.22929600874582926
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,255,0.1509760022163391
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,255,0.16865599155426025
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,255,0.1612320045630137
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,255,0.16280532876650491
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,255,0.20824533700942993
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,255,0.22252267599105835
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,255,0.17690134048461914
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,255,0.22433600823084512
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,255,0.1550826629002889
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,255,0.2209493319193522
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,255,0.1569439967473348
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,255,0.15918399890263876
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,255,0.2205280065536499
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,255,0.21842666467030844
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,255,0.16306666533152261
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,255,0.936901330947876
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,255,0.15458133816719055
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,255,0.1734666625658671
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,255,0.16025599837303162
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,255,0.8283039728800455
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,255,0.2305013338724772
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,255,0.17831466595331827
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,255,0.1650826632976532
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,255,0.23335466782251993
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,255,0.12265599767367046
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,255,0.22344533602396646
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,255,0.1586026648680369
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,255,0.22805333137512207
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,255,0.15980799992879233
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,255,0.22353599468866983
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,255,0.15924266974131265
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,255,0.15757333238919577
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,255,0.22955199082692465
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,255,0.20377600193023682
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,255,0.1609653333822886
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,255,0.10923199852307637
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,255,0.1606666644414266
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,255,0.41619733969370526
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,255,0.16012799739837646
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,255,0.22472000122070312
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,255,0.16607999801635742
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,255,0.22196799516677856
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,255,0.15736533204714456
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,255,0.15843199690183005
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,255,0.22589333852132162
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,255,0.15846400459607443
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,255,0.2327786684036255
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,255,0.2569440007209778
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,255,0.16185067097345987
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,255,0.20355733235677084
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,255,0.10988266269365947
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,255,0.15852266550064087
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,255,0.15658666690190634
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,255,0.23066665728886923
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,255,0.16280532876650491
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,255,0.22954134146372476
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,255,0.16134400169054666
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,255,0.23219732443491617
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,255,0.16153599818547568
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,255,0.147189329067866
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,255,0.16085867087046304
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,255,0.22514132658640543
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,255,0.1302826702594757
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,255,0.22564266125361124
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,255,0.15102400382359824
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,255,0.21042132377624512
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,255,0.10455466310183208
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,255,0.17812800407409668
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,255,0.15938133001327515
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,255,0.22999467452367148
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,255,0.16302399833997092
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,255,0.18151466051737467
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,255,0.13153066237767538
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,255,0.1478613317012787
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,255,0.3929706811904907
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,255,0.3726346492767334
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,255,0.16426666577657065
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,255,0.23582400878270468
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,255,0.1602133313814799
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,255,0.18262932697931925
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,255,0.20546134312947592
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,255,0.16275733709335327
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,255,0.11252267162005107
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,255,0.1569546659787496
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,255,0.1618933379650116
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,255,0.18149334192276
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,255,0.13300800323486328
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,255,0.22574400901794434
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,255,0.16410133242607117
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,255,0.2484053373336792
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,255,0.1322719951470693
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,255,0.23478933175404867
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,255,0.16291200121243796
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,255,0.18414932489395142
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,255,0.2257759968439738
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,255,0.2234826683998108
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,255,0.20498132705688477
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,255,0.16166399916013083
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,255,0.18888000647226968
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,255,0.22989332675933838
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,255,0.2696586648623149
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,255,0.21387199560801187
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,255,0.16481600205103555
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,255,0.2302079995473226
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,255,0.16035733620325723
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,255,0.22882133722305298
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,255,0.11869866649309795
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,255,0.16276266177495322
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,255,0.22712532679239908
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,255,0.22761066754659018
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,255,0.22658133506774902
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,255,0.16335999965667725
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,255,0.13013866543769836
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,255,0.2132800022761027
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,255,0.37569065888722736
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,255,0.49937601884206134
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,255,0.2988160053888957
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,255,0.23583465814590454
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,255,0.18134399255116782
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,255,0.16914665699005127
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,255,0.2407039999961853
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,255,0.23698665698369345
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,255,0.1676106651624044
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,255,0.22496533393859863
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,255,0.1320799986521403
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,255,0.18370666106541952
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,255,0.1692906618118286
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,255,0.23405333360036215
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,255,0.16484799981117249
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,255,1.5633920033772786
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,255,0.41422398885091144
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,255,0.9413866996765137
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,255,0.37006934483846027
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,255,0.5691306591033936
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,255,0.2712000012397766
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,255,0.18822934230168661
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,255,0.2137599984804789
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,255,0.27607999245325726
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,255,0.17652799685796103
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,255,0.17773334185282388
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,255,0.25204267104466754
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,255,0.22268267472585043
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,255,0.1811199982961019
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,255,0.24337599674860635
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,511,0.16158933440844217
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,255,0.1778986652692159
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,511,0.21010132630666098
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,511,0.15542399883270264
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,511,0.21335466702779135
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,511,0.2227519949277242
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,511,0.13249599933624268
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,511,0.15773866573969522
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,511,0.2161653240521749
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,511,0.16459199786186218
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,511,0.2143253286679586
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,511,0.15979199608167013
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,511,0.2395253380139669
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,511,0.12946133812268576
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,511,0.21626132726669312
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,511,0.16521599888801575
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,511,0.2063680092493693
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,511,0.15947733322779337
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,511,0.16003732879956564
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,511,0.17180800437927246
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,511,0.22217599550882974
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,511,0.16588266690572104
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,511,0.22581332921981812
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,511,0.16848532358805338
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,511,0.22980799277623495
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,511,0.22659732898076376
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,511,0.17494400342305502
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,255,0.22256000836690268
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,511,0.13885866602261862
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,511,0.1843199928601583
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,511,0.1666719913482666
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,511,0.20356800158818564
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,511,0.15945067008336386
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,511,0.20457067092259726
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,511,0.10732799768447876
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,511,0.23093867301940918
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,511,0.1632480025291443
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,511,0.24434133370717367
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,511,0.16591466466585794
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,511,0.22248532374699911
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,511,0.16376533110936484
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,511,0.150325338045756
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,511,0.22835199038187662
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,511,0.7900746663411459
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,511,0.20257065693537393
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,511,0.2214346726735433
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,511,0.22451200087865195
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,511,0.22110400597254434
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,511,0.16247466206550598
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,511,0.15973333517710367
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,511,0.10774399836858113
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,511,0.24207466840744019
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,511,0.4293973445892334
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,511,0.14260799686113992
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,511,0.23401600122451782
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,511,0.1299679974714915
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,511,0.2158986727396647
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,511,0.22817599773406982
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,511,0.16565333803494772
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,511,0.23612266778945923
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,511,0.12967466314633688
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,511,0.16193067034085593
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,511,0.2314079999923706
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,511,0.15573867162068686
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,511,0.23353066047032675
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,511,0.21244800090789795
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,511,0.1172266701857249
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,511,0.2351093292236328
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,511,0.1604213317235311
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,511,0.22822399934132895
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,511,0.16422399878501892
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,511,0.22566932439804077
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,511,0.16501866777737936
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,511,0.22798399130503336
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,511,0.16514666875203451
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,511,0.2283253272374471
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,511,0.15890666842460632
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,511,0.23797865708669028
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,511,0.1641333301862081
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,511,0.22466667493184408
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,511,0.16182933251063028
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,511,0.16150933504104614
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,511,0.11424533526102702
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,511,0.16666666666666666
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,511,0.2649066646893819
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,511,0.16110933820406595
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,511,0.23848533630371094
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,511,0.16217600305875143
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,511,0.22781866788864136
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,511,0.13221866885821024
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,511,0.2296853264172872
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,511,0.24674665927886963
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,511,0.16698133945465088
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,511,0.23092265923817953
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,511,0.16318399707476297
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,511,0.23020267486572266
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,511,0.16694400707880655
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,511,0.20915200312932333
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,511,0.1013759970664978
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,511,0.18446399768193564
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,511,0.16292799512545267
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,511,0.22406399250030518
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,511,0.16132266322771707
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,511,0.1895680030186971
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,511,0.16199466586112976
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,511,0.2265173395474752
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,511,0.16791999340057373
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,511,0.2145599921544393
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,511,0.22703999280929565
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,511,0.21267733971277872
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,511,0.16928533713022867
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,511,0.2278346618016561
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,511,0.16296000281969705
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,511,0.21076265970865884
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,511,0.21481066942214966
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,511,0.16176533699035645
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,511,0.16620266437530518
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,511,0.1725813349088033
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,511,0.22475733359654745
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,511,0.2226240038871765
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,511,0.13928000132242838
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,511,0.23094934225082397
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,511,0.1593226691087087
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,511,0.2336639960606893
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,511,0.16427200039227804
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,511,0.22376533349355063
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,511,0.16128533085187277
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,511,0.23203732570012411
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,511,0.1639253298441569
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,511,0.20357867081960043
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,511,0.27198400100072223
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,511,0.29198400179545086
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,511,0.21849066019058228
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,511,0.1498186687628428
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,511,0.22829866409301758
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,511,0.16661866505940756
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,511,0.17113600174585977
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,511,0.16268266240755716
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,511,0.21215999126434326
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,511,0.2288960019747416
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,511,0.1662720044453939
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,511,0.23377599318822226
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,511,0.16577600439389548
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,511,0.22579199075698853
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,511,0.2108373244603475
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,511,0.16622400283813477
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,511,0.4491413434346517
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,511,0.2961653272310893
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,511,0.29527999957402545
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,511,0.5371893246968588
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,511,0.3405333360036214
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,511,0.2493600050608317
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,511,0.2058239976565043
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,511,0.24267200628916422
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,511,0.19268266359965006
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,511,0.23840532700220743
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,511,0.1781546672185262
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,511,0.23296000560124716
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,511,0.23369600375493368
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,511,0.18061333894729614
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,511,0.1739199956258138
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,511,0.21967466672261557
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,511,0.5330186684926351
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,511,1.0363946755727131
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,511,0.3709546724955241
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,511,0.32118932406107586
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,511,0.47520001729329425
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,511,0.3855466842651367
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,511,0.6720053354899088
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,511,0.29527467489242554
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,511,0.27673067649205524
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,1023,0.09874133268992107
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,511,0.27056000630060834
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,511,0.35780266920725506
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,1023,0.13747732837994894
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,1023,0.1593119998772939
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,1023,0.2161066730817159
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,1023,0.16089600324630737
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,511,0.26714134216308594
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,511,0.33746667702992755
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,511,0.984831968943278
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,1023,0.15915200114250183
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,1023,0.21886932849884033
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,511,0.26606933275858563
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,511,0.31705600023269653
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,1023,0.2242506742477417
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,1023,0.22210667530695596
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,1023,0.16662933429082236
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,1023,0.15858667095502219
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,1023,0.2201919953028361
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,1023,0.15743466218312582
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,1023,0.16011733810106912
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,1023,0.21604265769322714
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,1023,0.20334400733311972
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,1023,0.15684266885121664
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,1023,0.23338133096694946
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,1023,0.16386133432388306
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,1023,0.23493866125742593
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,1023,0.162581334511439
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,1023,0.23206400871276855
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,1023,0.16262933611869812
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,1023,0.2239146629969279
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,1023,0.12627733747164407
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,1023,0.2221333384513855
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,1023,0.16768000523249307
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,1023,0.23067732652028403
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,1023,0.1737066706021627
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,1023,0.2255893349647522
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,1023,0.1585706671079
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,1023,0.26175467173258465
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,1023,0.31591467062632245
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,1023,0.15997866789499918
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,1023,0.24016533295313516
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,1023,0.23148266474405924
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,1023,0.16375466187795004
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,1023,0.22190932432810465
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,1023,0.1607093314329783
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,1023,0.15752533078193665
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,1023,0.224671999613444
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,1023,0.22498132785161337
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,1023,0.23734933137893677
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,1023,0.16561599572499594
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,1023,0.1599999964237213
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,1023,0.22664533058802286
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,1023,0.17058134078979492
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,1023,0.20339733362197876
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,1023,0.18918933471043906
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,1023,0.23737066984176636
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,1023,0.1643786629041036
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,1023,0.22403732935587564
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,1023,0.16403733690579733
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,1023,0.23271999756495157
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,1023,0.22674665848414102
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,1023,0.12432000041007996
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,1023,0.16588266690572104
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,1023,0.2290239930152893
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,1023,0.15820800264676413
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,1023,0.22488532463709512
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,1023,0.16875199476877847
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,1023,0.1604586640993754
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,1023,0.22416534026463827
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,1023,0.20760534207026163
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,1023,0.1686026652654012
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,1023,0.16169066230456033
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,1023,0.22060267130533853
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,1023,0.1723733345667521
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,1023,0.1264479955037435
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,1023,0.16949333747227988
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,1023,0.22869332631429037
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,1023,0.7564160029093424
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,1023,0.16330132881800333
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,1023,0.18110400438308716
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,1023,0.16492266456286112
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,1023,0.23215466737747192
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,1023,0.163674662510554
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,1023,0.22723732391993204
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,1023,0.16582933068275452
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,1023,0.20918399095535278
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,1023,0.16530133287111917
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,1023,0.14751467108726501
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,1023,0.23739200830459595
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,1023,0.55894935131073
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,1023,0.17030400037765503
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,1023,0.16269866625467935
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,1023,0.18468799193700156
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,1023,0.16772266228993735
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,1023,0.22973867257436117
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,1023,0.2227733333905538
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,1023,0.8603520393371582
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,1023,0.22168533007303873
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,1023,0.17061332861582437
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,1023,0.23417067527770996
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,1023,0.16505600015322366
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,1023,0.21046932538350424
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,1023,0.1329813301563263
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,1023,0.22641066710154215
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,1023,0.23131734132766724
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,1023,0.16598400473594666
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,1023,0.16953599452972412
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,1023,0.2385866641998291
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,1023,0.1662773291269938
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,1023,0.23715200026830038
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,1023,0.17139732837677002
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,1023,0.22217067082722983
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,1023,0.13432533542315164
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,1023,0.22832000255584717
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,1023,0.15967466433842978
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,1023,0.16470932960510254
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,1023,0.2112213373184204
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,1023,0.17856534322102866
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,1023,0.1453546682993571
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,1023,0.40249598026275635
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,1023,0.1313920021057129
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,1023,0.16890132427215576
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,1023,0.2341759999593099
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,1023,0.23323732614517212
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,1023,0.168229341506958
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,1023,0.23244265715281168
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,1023,0.16083733240763345
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,1023,0.22897066672643027
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,1023,0.16245866815249124
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,1023,0.18886399269104004
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,1023,0.23241599400838217
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,1023,0.1638879974683126
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,1023,0.1641866664091746
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,1023,0.20512000719706217
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,1023,0.28329066435496014
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,1023,0.20385066668192545
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,1023,0.3211626609166463
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,1023,0.24119466543197632
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,1023,0.18125865856806436
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,1023,0.23938133319218954
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,1023,0.17116800944010416
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,1023,0.22189333041508993
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,1023,0.1726133426030477
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,1023,0.19766932725906372
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,1023,0.17382399241129556
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,1023,0.23748266696929932
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,1023,0.17515732844670615
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,1023,0.2211946646372477
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,1023,0.16422933340072632
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,1023,0.21066133181254068
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,1023,0.5550293525060018
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,1023,0.4258506695429484
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,1023,0.39403732617696124
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,1023,0.33055466413497925
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,1023,0.618559996287028
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,1023,0.3277920087178548
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,1023,0.2961440086364746
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,1023,0.3012426694234212
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,1023,0.2669173280398051
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,1023,0.27873067061106366
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,1023,0.2695840001106262
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,1023,0.2730506658554077
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,1023,0.2663840055465698
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,1023,0.30558399359385174
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,1023,0.27811199426651
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,1023,0.24859732389450073
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,1023,0.7569759686787924
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,1023,0.5266720056533813
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,1023,0.49665598074595135
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,1023,1.2527519861857097
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,1023,0.5774133205413818
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,1023,0.8605706691741943
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,1023,0.6636799971262614
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,1023,0.5817439953486124
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,1023,0.47995201746622723
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,2047,0.1508746643861135
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,1023,0.5458399852116903
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,1023,0.4713493188222249
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,1023,0.5293706655502319
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,1023,0.4659359852472941
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,1023,0.5196906725565592
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,1023,0.8322347005208334
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,2047,0.5578506787618002
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,2047,0.16176533699035645
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,2047,0.17306133111317953
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,1023,0.5073813199996948
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,2047,0.1604373355706533
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,2047,0.22155199448267618
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,2047,0.22248532374699911
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,2047,0.15944000085194907
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,2047,0.22263999780019125
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,2047,0.21345599492390951
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,2047,0.16313067078590393
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,2047,0.1666826605796814
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,2047,0.21787200371424356
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,2047,0.22748800118764242
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,2047,0.1581653356552124
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,2047,0.21306133270263672
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,2047,0.16564800341924033
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,2047,0.16641066471735635
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,2047,0.23305600881576538
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,2047,0.1835306684176127
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,2047,0.16727999846140543
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,2047,0.18050134181976318
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,2047,0.16280532876650491
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,2047,0.17682133118311563
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,2047,0.16707199811935425
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,2047,0.24661866823832193
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,2047,0.16613333423932394
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,2047,0.19740267594655356
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,2047,0.22723732391993204
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,2047,0.1625226636727651
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,2047,0.1731520096460978
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,2047,0.16674667596817017
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,2047,0.12688000003496805
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,2047,0.16771199305852255
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,2047,0.2311413288116455
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,2047,0.23527467250823975
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,2047,0.40192532539367676
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,2047,0.16556266943613687
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,2047,0.1735466718673706
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,2047,0.23492799202601114
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,2047,0.23493866125742593
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,2047,0.16249600052833557
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,2047,0.18224000930786133
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,2047,0.1653600037097931
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,2047,0.16132799784342447
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,2047,0.20605866114298502
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,2047,0.2276159922281901
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,2047,0.17613333463668823
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,2047,0.15833066900571188
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,2047,0.13893866539001465
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,2047,0.22495466470718384
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,2047,0.18862400452295938
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,2047,0.1646506687005361
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,2047,0.20722132921218872
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,2047,0.16870933771133423
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,2047,0.22644267479578653
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,2047,0.1646666626135508
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,2047,0.2193173368771871
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,2047,0.1604106624921163
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,2047,0.23107200860977173
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,2047,0.16288000345230103
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,2047,0.2422879934310913
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,2047,0.16065067052841187
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,2047,0.20882133642832437
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,2047,0.14407466848691305
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,2047,0.31070399284362793
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,2047,0.16366933782895407
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,2047,0.22884267568588257
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,2047,0.16902933518091837
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,2047,0.226090669631958
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,2047,0.1602079967657725
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,2047,0.22842667500178018
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,2047,0.16537066300710043
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,2047,0.22978132963180542
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,2047,0.1569653352101644
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,2047,0.1609546641508738
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,2047,0.2460319995880127
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,2047,0.2303946614265442
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,2047,0.159578671058019
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,2047,0.2103839914004008
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,2047,0.16921599706013998
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,2047,0.1622666617234548
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,2047,0.21227733294169107
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,2047,0.220143993695577
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,2047,0.2272746761639913
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,2047,0.16171733538309732
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,2047,0.16100800037384033
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,2047,0.2242506742477417
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,2047,0.1678666671117147
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,2047,0.23330666621526083
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,2047,0.1639199952284495
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,2047,0.22360533475875854
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,2047,0.12818666299184164
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,2047,0.2278239925702413
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,2047,0.15785599748293558
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,2047,0.20801599820454916
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,2047,0.1270133356253306
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,2047,0.24183466037114462
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,2047,0.22906132539113364
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,2047,0.13677866260210672
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,2047,0.13132266203562418
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,2047,0.2321760058403015
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,2047,0.8352746963500977
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,2047,0.16076800227165222
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,2047,0.23854400714238486
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,2047,0.899733304977417
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,2047,0.1606613298257192
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,2047,0.24507200717926025
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,2047,0.16221867005030313
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,2047,0.22617600361506143
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,2047,0.18211734294891357
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,2047,0.1954773267110189
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,2047,0.23467199007670084
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,2047,0.2142933408419291
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,2047,0.17588800191879272
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,2047,0.19729065895080566
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,2047,0.2251573403676351
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,2047,0.16929600636164346
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,2047,0.14682132999102274
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,2047,0.20054932435353598
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,2047,0.17638399203618368
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,2047,0.17271467049916586
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,2047,0.23964800437291464
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,2047,0.23251734177271524
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,2047,0.2295466661453247
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,2047,0.1720906694730123
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,2047,0.17268800735473633
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,2047,0.1585919956366221
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,2047,0.3970453341801961
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,2047,0.3938239812850952
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,2047,0.3041920065879822
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,2047,0.3031733234723409
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,2047,0.2815679907798767
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,2047,0.2582933306694031
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,2047,0.2688266634941101
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,2047,0.24212799469629923
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,2047,0.2629706660906474
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,2047,0.2457546591758728
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,2047,0.22962667544682822
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,2047,0.25941866636276245
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,2047,0.2566399971644084
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,2047,0.22154132525126138
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,2047,0.2182719906171163
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,2047,0.25498666365941364
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,2047,0.7718986670176188
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,2047,0.5963786840438843
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,2047,0.8365653355916342
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,2047,0.5320320129394531
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,2047,0.6502453486124674
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,2047,0.5500479936599731
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,2047,0.5021386543909708
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,2047,0.5182666778564453
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,2047,0.4858826796213786
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,2047,0.5000853141148885
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,2047,0.4714826742808024
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,2047,1.4069493611653645
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,2047,0.5205653508504232
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,2047,0.48900266488393146
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,2047,0.4673759937286377
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,2047,0.4806400140126546
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,2047,0.9811840057373047
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,2047,0.924506664276123
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,2047,1.2509439786275227
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,2047,1.053605318069458
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,2047,1.6013813018798828
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,2047,1.193930705388387
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,2047,0.9663786888122559
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,2047,0.8977759679158529
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,2047,0.9278240203857422
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,2047,0.873253345489502
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,4095,0.11913599570592244
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,4095,0.21291200319925943
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,4095,0.15796800454457602
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,2047,0.8644213676452637
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,4095,0.21407467126846313
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,2047,1.3614452679951985
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,4095,0.16119999686876932
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,2047,0.9045066833496094
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,2047,0.8591732978820801
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,4095,0.21825599670410156
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,4095,0.20643732945124307
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,4095,0.22116265694300333
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,4095,0.16321067015329996
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,4095,0.22196267048517862
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,4095,0.15943466623624167
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,4095,0.20805333058039346
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,4095,0.21869866053263345
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,2047,0.8574026425679525
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,4095,0.22138667106628418
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,4095,0.16420267025629678
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,4095,0.2108746568361918
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,2047,0.8933173020680746
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,4095,0.15853333473205566
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,4095,0.13124799728393555
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,4095,0.2827146649360657
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,4095,0.22539732853571573
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,4095,0.1627840002377828
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,4095,0.22397865851720175
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,4095,0.2302239934603373
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,4095,0.2087786595026652
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,4095,0.16761600971221924
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,4095,0.5744266510009766
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,4095,0.2699573238690694
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,4095,0.22813334067662558
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,4095,0.1669173240661621
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,4095,0.7915306886037191
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,4095,0.8311413129170736
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,4095,0.2125599980354309
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,4095,0.22434133291244507
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,4095,0.2323626677195231
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,4095,0.1836479902267456
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,4095,0.1712053418159485
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,4095,0.6138399839401245
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,4095,0.2265226642290751
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,4095,0.16487999757130942
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,4095,0.23563732703526816
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,4095,0.16106667121251425
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,4095,0.2278719941775004
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,4095,0.17514665921529135
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,4095,0.22671467065811157
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,4095,0.16155733664830527
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,4095,0.16149333119392395
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,4095,0.2181439995765686
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,4095,0.2100106676419576
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,4095,0.10161067048708598
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,4095,0.23769599199295044
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,4095,0.16754666964213052
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,4095,0.23228800296783447
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,4095,0.1373599966367086
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,4095,0.23483733336130777
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,4095,0.12453866998354594
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,4095,0.24458666642506918
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,4095,0.13427733381589255
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,4095,0.22988800207773843
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,4095,0.2366559902826945
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,4095,0.16084800163904825
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,4095,0.17533334096272787
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,4095,0.2325119972229004
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,4095,0.1539359986782074
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,4095,0.21741332610448202
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,4095,0.1536906659603119
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,4095,0.15387733777364096
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,4095,0.16698133945465088
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,4095,0.18111467361450195
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,4095,0.22932799657185873
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,4095,0.22798399130503336
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,4095,0.15659733613332114
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,4095,0.1851466695467631
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,4095,0.16528532902399698
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,4095,0.23053866624832153
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,4095,0.1313920021057129
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,4095,0.17987199624379477
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,4095,0.17723733186721802
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,4095,0.22323733568191528
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,4095,0.20853867133458456
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,4095,0.15822933117548624
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,4095,0.16502933700879416
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,4095,0.38893866539001465
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,4095,0.13633599877357483
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,4095,0.2494773268699646
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,4095,0.15940266847610474
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,4095,0.23089599609375
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,4095,0.16737600167592367
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,4095,0.16487999757130942
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,4095,0.23214934269587198
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,4095,0.1633386711279551
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,4095,0.20738667249679565
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,4095,0.7978986899058024
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,4095,0.23970667521158853
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,4095,0.22639999787012735
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,4095,0.1650879979133606
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,4095,0.16377066572507223
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,4095,0.1804479956626892
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,4095,0.18338666359583536
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,4095,0.23518399397532144
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,4095,0.1835040052731832
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,4095,0.17491199572881064
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,4095,0.1759200096130371
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,4095,0.22835199038187662
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,4095,0.19099199771881104
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,4095,0.17114667097727457
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,4095,0.24185067415237427
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,4095,0.17619200547536215
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,4095,0.22529067595799765
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,4095,0.17824532588322958
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,4095,0.228767991065979
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,4095,0.20212799310684204
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,4095,0.1759200096130371
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,4095,0.25085333983103436
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,4095,0.31470932563145954
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,4095,0.27218133211135864
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,4095,0.2185973326365153
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,4095,0.2568639914194743
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,4095,0.23004800081253052
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,4095,0.25566933552424115
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,4095,0.23407999674479166
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,4095,0.2528160015741984
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,4095,0.25011734167734784
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,4095,0.23763734102249146
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,4095,0.23289066553115845
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,4095,0.24938132365544638
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,4095,0.19442667563756308
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,4095,0.22039467096328735
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,4095,0.24726400772730509
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,4095,0.6068319876988729
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,4095,0.643120010693868
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,4095,0.5268533229827881
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,4095,0.505952000617981
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,4095,0.5445599953333536
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,4095,0.46773334344228107
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,4095,0.4816853205362956
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,4095,0.4875626564025879
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,4095,0.4835999806722005
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,4095,0.4630613327026367
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,4095,0.726095994313558
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,4095,0.47706135114034015
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,4095,0.454586664835612
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,4095,0.47037335236867267
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,4095,0.45261867841084796
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,4095,0.4634186824162801
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,4095,1.2086453437805176
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,4095,2.0489652951558432
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,4095,0.9354666868845621
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,4095,0.9933493137359619
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,4095,0.9384640057881674
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,4095,0.9033333460489908
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,4095,1.0283466974894206
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,4095,0.9046613375345866
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,4095,0.8886559804280599
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,4095,0.8859840234120687
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,4095,0.8747573693593343
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,4095,0.8760053316752116
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,4095,0.9068106810251871
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,4095,0.874229351679484
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,4095,0.8651519616444906
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,4095,1.863957405090332
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,4095,2.3670667012532554
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,4095,1.7710132598876953
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,4095,2.054661273956299
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,4095,2.0134719212849936
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,4095,1.703407923380534
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,4095,1.730090618133545
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,4095,1.7370773951212566
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,4095,1.8166613578796387
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,4095,1.6954612731933594
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,4095,1.716325283050537
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,8191,0.16775466998418173
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,8191,0.18385066588719687
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,8191,0.7969333330790201
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,8191,0.21413334210713705
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,8191,0.16335999965667725
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,8191,0.20790932575861612
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,8191,0.16481600205103555
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,8191,0.21495999892552695
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,8191,0.1649279991785685
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,8191,0.208186666170756
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,8191,0.16642666856447855
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,8191,0.21185066302617392
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,8191,0.2132479945818583
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,8191,0.2138026754061381
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,8191,0.1593653361002604
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,8191,0.1935946742693583
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,8191,0.16427200039227804
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,8191,0.18311466773351034
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,8191,0.15403733650843301
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,8191,0.5142026742299398
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,8191,0.16641066471735635
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,8191,0.9000053405761719
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,8191,0.15806399782498678
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,8191,0.22867733240127563
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,8191,0.1539466679096222
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,8191,0.2281493345896403
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,8191,0.15492266416549683
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,8191,0.2294506629308065
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,8191,0.1583039959271749
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,8191,0.2270666758219401
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,8191,0.15441600481669107
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,8191,0.21187732617060342
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,8191,0.15953600406646729
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,8191,0.31143999099731445
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,8191,0.12387200196584065
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,8191,0.29126934210459393
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,8191,0.13467199603716531
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,8191,0.2792213360468547
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,4095,1.6494399706522624
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,8191,0.15706666310628256
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,4095,1.6978880564371746
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,8191,0.27246399720509845
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,4095,1.6561493873596191
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,8191,0.13056000073750815
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,8191,0.276256004969279
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,8191,0.16988267501195273
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,4095,1.654981295267741
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,8191,0.16110933820406595
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,8191,0.27663999795913696
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,8191,0.5405280192693075
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,4095,1.6783626874287922
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,8191,0.1637226641178131
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,8191,0.268010675907135
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,8191,0.1634719967842102
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,4095,1.6484853426615398
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,8191,0.127018670241038
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,8191,0.3104906678199768
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,8191,0.29651200771331787
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,8191,0.2767519950866699
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,8191,0.2937600016593933
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,8191,0.1677066683769226
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,8191,0.1644480029741923
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,8191,0.15793066223462424
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,8191,0.12562666336695352
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,8191,0.2746346592903137
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,8191,0.2773973345756531
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,8191,0.16250133514404297
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,8191,0.2759840091069539
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,8191,0.16566399733225504
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,8191,0.26475199063618976
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,8191,0.2424479921658834
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,8191,0.15866133570671082
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,8191,0.23169066508611044
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,8191,0.16674133141835532
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,8191,0.21850667397181192
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,8191,0.237226665019989
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,8191,0.16787733634312949
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,8191,0.16426133116086325
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,8191,0.23054933547973633
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,8191,0.22423466046651205
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,8191,0.16526933511098227
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,8191,0.19030400117238364
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,8191,0.17126933733622232
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,8191,0.2304426630338033
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,8191,0.17037334044774374
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,8191,0.2047413388888041
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,8191,0.1880906621615092
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,8191,0.22512000799179077
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,8191,0.22750933965047201
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,8191,0.7526613076527914
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,8191,0.18024533987045288
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,8191,0.18026133378346762
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,8191,0.7614239851633707
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,8191,0.16039466857910156
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,8191,0.18761066595713297
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,8191,0.1750826636950175
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,8191,0.22338134050369263
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,8191,0.17550400892893472
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,8191,0.32157333691914874
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,8191,0.1713599960009257
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,8191,0.24543466170628866
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,8191,0.2348960041999817
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,8191,0.28334933519363403
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,8191,0.14882133404413858
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,8191,0.2865813374519348
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,8191,0.22588266928990683
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,8191,0.2762719988822937
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,8191,0.22951465845108032
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,8191,0.27771733204523724
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,8191,0.1863093376159668
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,8191,0.2723466753959656
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,8191,0.2352480093638102
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,8191,0.2715946634610494
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,8191,0.2323413292566935
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,8191,0.3014346758524577
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,8191,0.2363626758257548
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,8191,0.2701333363850911
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,8191,0.18270933628082275
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,8191,0.5368693272272745
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,8191,0.5597653388977051
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,8191,0.5060426791508993
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,8191,0.5107733408610026
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,8191,0.4628533522288005
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,8191,0.4783946673075358
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,8191,0.4543786843617757
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,8191,0.46989333629608154
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,8191,0.4504106839497884
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,8191,0.47487465540568036
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,8191,0.446234663327535
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,8191,0.46908799807230633
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,8191,0.4498666524887085
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,8191,0.45981868108113605
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,8191,0.449285348256429
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,8191,0.45493332544962567
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,8191,1.03383469581604
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,8191,1.0324373245239258
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,8191,0.9051840305328369
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,8191,0.9246933460235596
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,8191,0.8707786401112875
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,8191,0.880138635635376
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,8191,0.8643999894460043
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,8191,0.8923146724700928
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,8191,0.8610773086547852
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,8191,0.8673333326975504
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,8191,0.8548426628112793
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,8191,0.926688035329183
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,8191,1.4134507179260254
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,8191,0.8530293305714926
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,8191,1.4608426094055176
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,8191,0.8468960126241049
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,8191,2.068837324778239
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,8191,1.9848853747049968
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,8191,1.7935733795166016
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,8191,1.7853172620137532
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,8191,1.7286027272542317
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,8191,1.7086453437805176
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,8191,1.7180852890014648
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,8191,1.720373312632243
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,8191,1.6782399813334148
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,8191,1.653978665669759
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,8191,1.667690594991048
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,8191,2.0744959513346353
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,8191,1.6434933344523113
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,8191,1.659813404083252
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,8191,1.661941369374593
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,8191,1.654901345570882
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,8191,3.7846078872680664
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,8191,3.535578727722168
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,8191,3.3596747716267905
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,8191,3.901669184366862
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,8191,3.33079465230306
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,8191,3.3640267054239907
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,8191,3.27344544728597
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,8191,3.2615893681844077
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,8191,3.2154293060302734
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,16383,0.1665279964605967
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,8191,3.247023900349935
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,8191,3.285909334818522
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,16383,0.18706132968266806
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,8191,3.2369066874186196
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,16383,0.16452800234158835
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,16383,0.1613706648349762
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,16383,0.2461706598599752
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,16383,0.16276266177495322
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,16383,0.19936533768971762
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,16383,0.17957866191864014
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,8191,3.209562619527181
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,16383,0.17478932936986288
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,16383,0.21847999095916748
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,16383,0.16331199804941812
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,16383,0.23772267500559488
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,16383,0.1819253365198771
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,16383,0.21651732921600342
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,16383,0.15987733006477356
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,16383,0.20393067598342896
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,16383,0.16674667596817017
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,16383,0.22855466604232788
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,16383,0.16796799500783285
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,16383,0.23574932416280112
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,16383,0.1647040049235026
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,8191,3.236133257548014
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,16383,0.23355199893315634
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,16383,0.16406933466593424
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,16383,0.1797813375790914
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,16383,0.1625226636727651
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,8191,3.2304000854492188
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,16383,1.5697973569234211
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,16383,0.17694934209187826
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,16383,0.23029333353042603
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,16383,0.1646613379319509
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,16383,0.22873065869013467
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,16383,0.21020267407099405
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,8191,3.1717332204182944
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,16383,0.16404267152150473
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,16383,0.14030933380126953
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,16383,0.2174453337987264
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,16383,0.16595199704170227
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,16383,0.23059199253718057
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,16383,0.16665599743525186
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,16383,0.25311466058095294
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,16383,0.31060800949732464
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,16383,0.23287999629974365
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,16383,0.16364799936612448
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,16383,0.22950400908788046
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,16383,0.16312533617019653
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,16383,0.15449600418408713
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,16383,0.23335999250411987
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,16383,0.22815465927124023
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,16383,0.16214399536450705
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,16383,0.21100799242655435
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,16383,0.17428267002105713
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,16383,0.15971733132998148
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,16383,0.23686933517456055
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,16383,0.16500266393025717
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,16383,0.17815999190012613
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,16383,0.1853333314259847
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,16383,0.16847467422485352
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,16383,0.18081599473953247
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,16383,0.18030933539072672
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,16383,0.22966933250427246
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,16383,0.16674133141835532
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,16383,0.2253920038541158
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,16383,0.14959999918937683
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,16383,0.21230934063593546
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,16383,0.22787733872731528
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,16383,0.12829333543777466
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,16383,0.18275733788808188
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,16383,0.1959786613782247
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,16383,0.23544534047444662
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,16383,0.2339359919230143
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,16383,0.17429333925247192
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,16383,0.17601599295934042
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,16383,0.22510933876037598
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,16383,0.2185759941736857
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,16383,0.179258664449056
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,16383,0.17467200756072998
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,16383,0.22801067431767783
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,16383,0.22410666942596436
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,16383,0.22039467096328735
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,16383,0.154448002576828
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,16383,0.17861332496007284
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,16383,0.1724053422609965
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,16383,0.29598400990168255
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,16383,0.5215733448664347
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,16383,0.47408000628153485
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,16383,0.49217065175374347
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,16383,0.2765333255132039
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,16383,0.29688000679016113
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,16383,0.27622934182484943
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,16383,0.3072320024172465
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,16383,0.46488531430562335
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,16383,0.2720693349838257
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,16383,0.46796266237894696
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,16383,0.46613868077596027
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,16383,0.2685386737187703
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,16383,0.47993067900339764
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,16383,0.2736426591873169
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,16383,0.4481866757074992
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,16383,0.5018719832102457
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,16383,0.4891626834869385
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,16383,1.7788532574971516
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,16383,0.5052053531010946
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,16383,0.47385601202646893
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,16383,0.5126773516337076
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,16383,0.5024853150049845
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,16383,0.46036267280578613
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,16383,0.5538666645685831
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,16383,0.4660000006357829
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,16383,0.617248018582662
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,16383,0.5149600108464559
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,16383,0.49593067169189453
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,16383,0.4873173236846924
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,16383,0.9040959676106771
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,16383,0.44944000244140625
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,16383,0.9667306741078695
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,16383,0.9482986927032471
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,16383,0.8721653620402018
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,16383,0.8599466482798258
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,16383,0.859989325205485
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,16383,1.7385973930358887
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,16383,0.893280029296875
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,16383,1.911861260732015
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,16383,0.8688320318857828
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,16383,0.8605493704477946
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,16383,0.8618026574452718
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,16383,0.8527733484903971
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,16383,0.8505973021189371
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,16383,0.8507359822591146
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,16383,0.8471840222676595
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,16383,2.976853370666504
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,16383,1.9002827008565266
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,16383,1.7077706654866536
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,16383,1.7777867317199707
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,16383,1.7510186831156414
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,16383,1.6672426859537761
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,16383,1.6763520240783691
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,16383,1.649338722229004
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,16383,2.0117653210957847
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,16383,1.6928480466206868
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,16383,1.636624018351237
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,16383,1.6494186719258626
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,16383,1.6300106048583984
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,16383,1.690981388092041
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,16383,1.628933270772298
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,16383,1.6545920372009277
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,16383,1.6589172681172688
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,16383,3.498469352722168
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,16383,3.7849067052205405
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,16383,3.3741652170817056
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,16383,3.3650080362955728
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,16383,3.3180745442708335
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,16383,3.2478294372558594
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,16383,3.271498680114746
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,16383,3.222522735595703
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,16383,3.281066576639811
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,16383,3.207477251688639
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,16383,3.2385759353637695
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,16383,3.2440052032470703
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,16383,3.211855888366699
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,16383,3.2789332071940103
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,16383,3.252490679423014
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,16383,3.1620960235595703
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,16383,6.561424255371094
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,16383,7.249237060546875
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,16383,6.955477396647136
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,16383,6.703877131144206
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,16383,6.4899946848551435
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,16383,6.381125132242839
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,16383,6.440799713134766
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,16383,6.332719802856445
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,16383,6.467973073323567
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,32767,0.17134400208791098
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,32767,0.22643200556437174
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,32767,0.21778666973114014
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,32767,0.22586133082707724
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,32767,0.16583466529846191
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,32767,0.22561599810918173
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,32767,0.2916853427886963
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,16383,6.298122406005859
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,32767,0.2285226583480835
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,32767,0.16918933391571045
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,32767,0.22716800371805826
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,32767,0.13431466619173685
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,32767,0.13899200161298117
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,32767,0.23109332720438638
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,32767,0.2297066648801168
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,32767,0.16239999731381735
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,32767,0.21370132764180502
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,32767,0.242576003074646
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,32767,0.1753386656443278
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,32767,0.16511999567349753
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,16383,6.416757583618164
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,32767,0.16939733425776163
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,32767,0.23866132895151773
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,32767,0.233189324537913
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,32767,0.16402666767438254
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,32767,0.22940800587336221
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,32767,0.17031999429066977
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,16383,6.40550422668457
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,32767,0.23219199975331625
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,32767,0.16210666298866272
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,32767,0.19549866517384848
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,32767,0.16270933548609415
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,32767,0.22035199403762817
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,32767,0.27353066205978394
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,32767,0.22615466515223184
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,32767,0.1707520087560018
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,16383,6.394314448038737
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,32767,0.230186661084493
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,32767,0.369759996732076
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,32767,0.3118026653925578
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,32767,0.13769599795341492
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,32767,0.23137599229812622
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,16383,6.395029067993164
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,32767,0.17414400974909464
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,32767,0.6794880231221517
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,32767,0.3114560047785441
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,32767,0.16582399606704712
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,32767,0.1367733379205068
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,32767,0.1653600037097931
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,16383,6.29634157816569
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,32767,0.2400160034497579
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,32767,0.23619733254114786
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,32767,0.16484799981117249
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,16383,6.366645177205403
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,32767,0.1672853430112203
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,32767,0.17391467094421387
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,32767,0.839733362197876
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,32767,0.17701866229375204
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,32767,0.9273227055867513
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,32767,0.9343252976735433
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,32767,0.9189653396606445
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,32767,0.17200533548990884
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,32767,0.1690346598625183
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,32767,0.9220693111419678
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,32767,0.1670666734377543
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,32767,0.9116960366566976
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,32767,0.9215146700541178
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,32767,0.17013333241144815
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,32767,0.16525333126386008
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,32767,0.9209173520406088
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,32767,0.9038560390472412
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,32767,0.28524800141652423
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,32767,0.28008000055948895
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,32767,0.9425439834594727
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,32767,0.27423999706904095
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,32767,0.9290719827016195
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,32767,0.9377013047536215
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,32767,0.2703253428141276
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,32767,0.3028320074081421
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,32767,0.27187732855478924
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,32767,0.9067520300547282
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,32767,0.9090666770935059
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,32767,0.9175946712493896
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,32767,0.26917866865793866
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,32767,0.9185120264689127
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,32767,0.2903733253479004
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,32767,0.9070293108622233
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,32767,0.5111413399378458
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,32767,0.5040160020192465
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,32767,0.8818773428599039
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,32767,0.8636053403218588
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,32767,0.8634239832560221
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,32767,0.5549120108286539
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,32767,0.4888906478881836
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,32767,0.8676373163859049
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,32767,0.4892266591389974
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,32767,0.8461546897888184
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,32767,0.5543946822484335
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,32767,0.8618079821268717
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,32767,0.4954880078633626
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,32767,0.4886346658070882
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,32767,0.849840005238851
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,32767,0.8404266834259033
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,32767,0.9503040313720703
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,32767,0.8712906837463379
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,32767,1.049562692642212
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,32767,0.9326720237731934
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,32767,0.9367520014444987
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,32767,0.8965066274007162
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,32767,0.9398612976074219
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,32767,0.8484640121459961
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,32767,0.8472053209940592
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,32767,0.9315786361694336
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,32767,0.9344213008880615
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,32767,0.8806026776631674
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,32767,3.3550774256388345
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,32767,0.8565386931101481
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,32767,0.9707039992014567
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,32767,0.9299039840698242
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,32767,1.8276000022888184
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,32767,1.7044320106506348
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,32767,1.6746400197347004
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,32767,1.7046079635620117
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,32767,1.667952060699463
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,32767,1.655392011006673
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,32767,1.6539146105448406
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,32767,1.630890687306722
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,32767,1.624000072479248
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,32767,1.6796159744262695
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,32767,1.6518294016520183
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,32767,1.6226827303568523
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,32767,1.6459253629048665
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,32767,1.6477707227071126
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,32767,1.6468586921691895
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,32767,1.6289386749267578
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,32767,3.328282674153646
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,32767,3.591989199320475
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,32767,3.3037118911743164
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,32767,3.2185118993123374
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,32767,3.2656428019205728
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,32767,3.256352106730143
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,32767,3.1760212580362954
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,32767,3.182330767313639
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,32767,3.24996280670166
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,32767,3.174037297566732
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,32767,3.2356907526652017
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,32767,3.1663840611775718
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,32767,3.244192123413086
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,32767,3.174549420674642
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,32767,3.2678133646647134
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,32767,3.1542027791341147
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,32767,6.350927988688151
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,32767,6.548197428385417
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,32767,7.221941630045573
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,32767,6.5753173828125
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,32767,6.4798024495442705
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,32767,6.4760481516520185
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,32767,6.301989237467448
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,32767,6.279045104980469
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,32767,6.422639846801758
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,65535,0.17469867070515951
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,65535,0.22321067253748575
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,65535,0.16551466782887778
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,65535,0.21568532784779867
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,65535,0.166703999042511
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,65535,0.20081599553426108
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,65535,0.579205314318339
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,65535,0.22778133551279703
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,65535,0.16641599933306375
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,32767,6.428266525268555
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,65535,0.2224959929784139
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,65535,0.19035732746124268
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,65535,0.3910293181737264
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,32767,6.310944239298503
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,65535,0.1643786629041036
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,65535,0.22426132361094156
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,65535,0.16806934277216592
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,65535,0.20363199710845947
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,32767,6.290682474772136
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,65535,0.17230933904647827
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,65535,0.16696532567342123
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,65535,0.23518933852513632
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,65535,0.1824693282445272
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,32767,6.328533172607422
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,65535,0.17169066270192465
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,65535,0.22291733821233115
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,65535,0.17047999302546182
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,65535,0.17316800355911255
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,65535,0.17088532447814941
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,65535,0.23443732659022012
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,65535,0.16517866651217142
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,65535,0.23057067394256592
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,65535,0.15983466307322183
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,65535,0.17762132485707602
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,32767,6.415760040283203
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,65535,0.16447466611862183
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,65535,0.20467199881871542
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,32767,6.242682774861653
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,65535,0.1824693282445272
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,65535,0.17861332496007284
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,65535,0.19106133778889975
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,65535,1.7677653630574544
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,65535,0.17587200800577799
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,65535,1.7642347017923992
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,65535,0.1768959959348043
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,65535,1.7764533360799153
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,65535,0.17308266957600912
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,65535,1.7270612716674805
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,65535,1.7772053082784016
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,65535,0.17603733142217
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,65535,0.1723840037981669
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,65535,1.7489546140034993
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,65535,1.7283573150634766
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,65535,1.7230614026387532
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,65535,0.2865813374519348
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,65535,0.30932267506917316
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,65535,0.27820799748102826
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,65535,1.790287971496582
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,65535,1.7776106198628743
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,65535,0.27327466011047363
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,65535,1.7397066752115886
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,65535,0.3014986713727315
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,65535,1.797690709431966
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,65535,0.2677866617838542
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,65535,1.7607733408610027
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,65535,1.7437386512756348
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,65535,0.273199995358785
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,65535,1.732319990793864
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,32767,6.431711832682292
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,65535,0.2709866762161255
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,65535,1.726688067118327
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,65535,0.5416320164998373
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,65535,0.5709013144175211
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,65535,1.778981367746989
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,65535,0.4912213484446208
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,65535,1.7554292678833008
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,65535,0.49743465582529706
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,65535,1.7484374046325684
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,65535,0.5572959979375204
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,65535,1.729765256245931
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,65535,0.48865067958831787
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,65535,1.761247952779134
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,65535,1.6870293617248535
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,65535,0.5018666585286459
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,65535,1.7616960207621257
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,65535,0.48677865664164227
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,65535,1.7837600708007812
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,65535,0.9473439852396647
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,65535,1.645642598470052
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,65535,1.69978666305542
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,65535,0.9662880102793375
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,65535,1.6849120457967122
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,65535,0.9475573698679606
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,65535,0.9363733132680258
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,65535,1.6406399408976238
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,65535,0.9408906300862631
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,65535,0.9338293075561523
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,65535,1.6406559944152832
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,65535,1.5938453674316406
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,65535,1.6471254030863445
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,65535,1.7033066749572754
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,65535,1.314464012781779
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,65535,1.6273706754048665
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,65535,1.790453275044759
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,65535,1.6799413363138835
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,65535,1.8178772926330566
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,65535,1.6287892659505208
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,65535,1.8597013155619304
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,65535,1.6186347007751465
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,65535,1.8117173512776692
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,65535,1.6232159932454426
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,65535,1.8079840342203777
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,65535,1.6196853319803874
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,65535,1.6341172854105632
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,65535,1.795514742533366
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,65535,2.078154722849528
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,65535,1.6071467399597168
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,65535,3.5416905085245767
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,65535,3.255210558573405
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,65535,1.8399465878804524
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,65535,1.6200532913208008
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,65535,3.263258616129557
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,65535,3.2073494593302407
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,65535,3.263530731201172
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,65535,3.245840072631836
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,65535,3.194352149963379
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,65535,3.2496747970581055
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,65535,3.1816479365030923
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,65535,3.2485386530558267
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,65535,3.13865598042806
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,65535,3.234224001566569
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,65535,3.237290700276693
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,65535,3.144378662109375
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,65535,3.153717358907064
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,65535,3.28385066986084
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,65535,7.017354965209961
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,65535,6.385413487752278
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,65535,6.363610585530599
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,65535,6.479509353637695
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,65535,6.253381093343099
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,65535,6.220213572184245
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,65535,6.470528284708659
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,65535,6.4288584391276045
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,65535,6.434773127237956
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,131071,0.19053866465886435
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,65535,6.446159998575847
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,131071,0.1764799952507019
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,131071,0.22182399034500122
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,131071,0.2166986664136251
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,131071,0.21923200289408365
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,131071,0.17931199073791504
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,131071,0.17273066441218057
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,131071,1.896176020304362
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,131071,0.16762133439381918
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,65535,6.244453430175781
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,131071,0.23712533712387085
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,131071,0.1736853321393331
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,131071,0.17058134078979492
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,65535,6.237663904825847
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,131071,0.2230986754099528
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,131071,0.18112534284591675
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,131071,0.4322293202082316
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,131071,0.17661333084106445
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,131071,0.20697067181269327
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,131071,0.20277865727742514
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,131071,3.5209172566731772
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,131071,0.17942933241526285
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,131071,3.4564746220906577
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,65535,6.288543701171875
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,131071,0.6877226829528809
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,65535,6.408938725789388
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,131071,0.18625599145889282
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,131071,3.47267214457194
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,131071,0.19728533426920572
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,131071,3.4591894149780273
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,65535,6.292816162109375
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,131071,3.3462934494018555
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,131071,0.1824000080426534
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,131071,0.18508267402648926
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,131071,3.3890132904052734
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,131071,0.29363733530044556
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,65535,6.413424173990886
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,131071,3.431669235229492
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,131071,0.2860906720161438
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,131071,3.366901397705078
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,131071,0.3198773264884949
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,131071,3.360560099283854
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,131071,0.3189013401667277
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,131071,0.2767680088678996
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,131071,3.50602118174235
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,131071,3.4512853622436523
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,131071,0.2732693354288737
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,131071,3.4007412592569985
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,131071,0.2752373417218526
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,131071,3.320730527242025
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,131071,0.28016000986099243
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,131071,3.376240094502767
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,131071,3.379605293273926
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,131071,3.249050776163737
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,131071,0.5013546546300253
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,131071,0.49750399589538574
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,131071,0.4896320104598999
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,131071,0.48902400334676105
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,131071,3.380730628967285
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,131071,3.366570790608724
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,131071,0.5536799828211466
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,131071,3.4039306640625
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,131071,0.49092264970143634
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,131071,3.3822720845540366
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,131071,0.509557326634725
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,131071,3.323472023010254
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,131071,0.48625067869822186
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,131071,3.339594523111979
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,131071,3.363295873006185
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,131071,3.3038508097330728
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,131071,0.938922643661499
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,131071,3.53275203704834
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,131071,0.9392480055491129
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,131071,1.0776639779408772
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,131071,3.4403254191080728
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,131071,3.515413284301758
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,131071,3.479861259460449
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,131071,0.9407626787821451
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,131071,0.9261333147684733
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,131071,3.480047861735026
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,131071,0.9392266273498535
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,131071,0.9768853187561035
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,131071,3.430074691772461
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,131071,3.463242530822754
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,131071,0.9350346724192301
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,131071,3.309424082438151
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,131071,3.1965440114339194
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,131071,1.82424529393514
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,131071,3.1613601048787436
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,131071,1.792202631632487
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,131071,1.8206772804260254
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,131071,3.2106825510660806
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,131071,1.848149299621582
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,131071,3.1783787409464517
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,131071,1.8033973375956218
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,131071,1.8110186258951824
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,131071,3.2047414779663086
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,131071,1.8182934125264485
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,131071,3.1852585474650064
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,131071,3.1994078954060874
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,131071,1.8201707204182942
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,131071,3.1826292673746743
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,131071,3.2304159800211587
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,131071,3.485600153605143
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,131071,3.667365392049154
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,131071,3.1894346872965493
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,131071,3.1910826365152993
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,131071,3.6686185201009116
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,131071,3.658735911051432
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,131071,3.174976030985514
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,131071,4.089093208312988
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,131071,3.154693285624186
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,131071,3.586666742960612
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,131071,3.162800153096517
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,131071,3.160271962483724
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,131071,4.088650703430176
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,131071,3.553370793660482
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,131071,3.138703982035319
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,131071,6.924869537353516
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,131071,6.30681037902832
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,131071,6.461941401163737
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,131071,6.760271708170573
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,131071,6.448127746582031
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,131071,6.309872309366862
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,131071,6.251770655314128
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,131071,6.4567413330078125
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,131071,6.464256286621094
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,131071,6.226117451985677
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,131071,6.222954432169597
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,131071,6.436250686645508
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,131071,6.430255889892578
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,131071,6.236186981201172
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,131071,6.213205337524414
VLLM,0.12.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,131071,6.4177602132161455
