framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,1,0.10019200046857198
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,1,0.09636800487836202
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,1,0.2160053253173828
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,1,0.15611199537913004
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,1,0.35973866780598956
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,1,0.15220800042152405
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,1,0.21010132630666098
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,1,0.1020853320757548
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,1,0.12743467092514038
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,1,0.1613653302192688
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,1,0.14686399698257446
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,1,0.17876799901326498
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,1,0.12222400307655334
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,1,0.10316800077756245
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,1,0.1590826710065206
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,1,0.14312000075976053
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,1,0.10178666313489278
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,1,0.2137920061747233
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,1,0.10426666339238484
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,1,0.10207466284434001
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,1,0.21130132675170898
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,1,0.14512532949447632
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,1,0.16142400105794272
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,1,0.15246400237083435
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,1,0.15251200397809347
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,1,0.21915733814239502
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,1,0.155541330575943
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,1,0.1889866590499878
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,1,0.12568533420562744
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,1,0.16269866625467935
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,1,0.22780799865722656
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,1,0.15960533420244852
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,1,0.15661866466204324
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,1,0.21532267332077026
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,1,0.12615999579429626
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,1,0.15939733386039734
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,1,0.13829333583513895
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,1,0.21157866716384888
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,1,0.12304000059763591
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,1,0.162063995997111
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,1,0.22444266080856323
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,1,0.14595199624697366
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,1,0.1734559933344523
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,1,0.22406399250030518
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,1,0.20777599016825357
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,1,0.16200000047683716
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,1,0.16015467047691345
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,1,0.2276853322982788
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,1,0.2193333307902018
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,1,0.16553599635759988
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,1,0.15923733512560526
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,1,0.2274186611175537
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,1,0.13615467151006064
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,1,0.12436266740163167
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,1,0.21337066094080606
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,1,0.19774399201075235
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,1,0.14337066809336343
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,1,0.2384799917538961
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,1,0.22921599944432577
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,1,0.1605280041694641
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,1,0.16080000003178915
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,1,0.1986080010732015
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,1,0.1613653302192688
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,1,0.2262773315111796
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,1,0.16129066546758017
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,1,0.430517315864563
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,1,0.16154133280118307
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,1,0.1369439959526062
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,1,0.1458506683508555
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,1,0.2374026576677958
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,1,0.16014400124549866
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,1,0.13152533769607544
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,1,0.2198186715443929
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,1,0.16850666205088297
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,1,0.1835199991861979
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,1,0.15808000167210898
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,1,0.12548266847928366
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,1,0.15622400244077048
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,1,0.23375999927520752
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,1,0.20777599016825357
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,1,0.22176533937454224
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,1,0.3625386555989583
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,1,0.16201066970825195
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,1,0.15850667158762613
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,1,0.12544533610343933
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,1,0.14667733510335287
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,1,0.22138667106628418
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,1,0.15211199720700583
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,1,0.12013333042462666
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,1,0.16289066274960837
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,1,0.22630399465560913
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,1,0.2275893290837606
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,1,0.15824000040690103
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,1,0.47014399369557697
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,1,0.6635306676228842
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,1,0.17861332496007284
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,1,0.2265226642290751
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,1,0.16148266196250916
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,1,0.23662932713826498
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,1,0.15873600045839945
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,1,1.043061335881551
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,1,0.2216213345527649
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,1,0.16107733050982156
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,1,0.23391467332839966
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,1,0.1236799955368042
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,1,0.17754666010538736
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,1,0.12582932909329733
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,1,0.1583199997742971
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,1,0.2271733283996582
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,1,0.2302239934603373
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,1,0.1220266620318095
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,1,0.19941866397857666
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,1,0.3615146478017171
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,1,0.16531200210253397
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,1,0.3395040035247803
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,1,0.25600000222524005
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,1,0.18493332465489706
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,1,0.10218666990598042
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,1,0.09657067060470581
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,1,0.1548426647981008
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,1,0.1586720049381256
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,1,0.12237866719563802
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,1,0.15440000096956888
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,1,0.20177600781122842
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,1,0.15987199544906616
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,1,0.15548800428708395
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,1,0.22920533021291098
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,1,0.20503999789555868
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,1,0.17043733596801758
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,1,0.23452800512313843
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,1,0.16222400466601053
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,1,0.29418132702509564
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,1,0.1515679955482483
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,1,0.18180267016092935
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,1,0.13801599542299905
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,1,0.18290666739145914
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,1,0.17197867234547934
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,1,0.12357333302497864
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,1,0.11640533804893494
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,1,0.15572800238927206
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,1,0.14392000436782837
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,1,0.1949546734491984
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,1,0.16007467110951742
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,1,0.1562026639779409
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,1,0.421125332514445
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,1,0.20297600825627646
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,1,0.30953067541122437
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,1,0.2200266718864441
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,1,0.15965333580970764
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,1,0.25359465678532916
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,1,0.10452266534169515
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,1,0.1713013251622518
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,1,0.13456533352533975
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,1,0.19619733095169067
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,1,0.39193065961201984
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,1,0.16180266936620077
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,1,0.3062826593716939
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,1,0.1881386637687683
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,1,0.13134933511416116
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,1,0.29254400730133057
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,1,0.2711946765581767
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,1,0.16261333227157593
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,1,0.4829440116882324
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,1,0.31303999821345013
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,1,0.10637332995732625
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,1,0.7897013028462728
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,1,0.11361066500345866
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,1,0.22438400983810425
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,1,0.13448533415794373
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,3,0.0939573347568512
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,1,0.20546134312947592
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,1,0.16432533661524454
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,3,0.16614933808644614
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,1,0.16966400543848673
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,3,0.15653333067893982
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,1,0.16826667388280234
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,1,0.23745600382486978
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,3,0.20790932575861612
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,3,0.11569600303967793
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,3,0.17443732420603433
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,3,0.14275733629862467
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,3,0.2124639948209127
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,1,0.1938719948132833
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,3,0.20156800746917725
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,1,0.18014399210611978
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,3,0.15532799561818442
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,3,0.14808000127474466
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,3,0.15797866384188333
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,3,0.20992000897725424
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,3,0.16900799671808878
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,3,0.11777066191037495
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,3,0.1959893306096395
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,3,0.15370133519172668
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,3,0.15759467085202536
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,3,0.2184106707572937
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,3,0.22497600317001343
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,3,0.1591146687666575
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,3,0.2216800053914388
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,3,0.22441067298253378
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,3,0.15822933117548624
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,3,0.15332266688346863
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,3,0.15319466590881348
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,3,0.22828267018000284
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,3,0.2244960069656372
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,3,0.1590826710065206
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,3,0.16488533218701681
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,3,0.1866719921429952
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,3,0.34330666065216064
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,3,0.16381866733233133
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,3,0.22537066539128622
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,3,0.2245546579360962
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,3,0.1625386675198873
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,3,0.2311519980430603
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,3,0.22908800840377808
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,3,0.1552906632423401
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,3,0.16378133495648703
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,3,0.14506666858990988
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,3,0.2132586638132731
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,3,0.13333866993586221
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,3,0.15678399801254272
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,3,0.22608532508214316
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,3,0.2185493310292562
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,3,1.009925365447998
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,3,1.5988213221232097
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,3,0.20182400941848755
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,3,0.2355519930521647
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,3,0.7326613267262777
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,3,0.16196266810099283
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,3,0.12190933028856914
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,3,0.191210667292277
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,3,0.1662346621354421
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,3,0.15566399693489075
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,3,0.22345600525538126
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,3,0.23382399479548135
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,3,0.16090133786201477
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,3,0.23359467585881552
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,3,0.2304640014966329
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,3,0.3152533372243245
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,3,0.1662720044453939
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,3,0.2071146567662557
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,3,0.17339199781417847
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,3,0.2287999987602234
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,3,0.12112533052762349
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,3,0.22195732593536377
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,3,0.16240533192952475
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,3,0.16057599584261575
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,3,0.21839465697606406
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,3,0.22899732987085977
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,3,0.16100266575813293
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,3,0.1774453322092692
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,3,0.1543359955151876
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,3,0.22536534070968628
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,3,0.1604159971078237
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,3,0.22639467318852743
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,3,0.20472532510757446
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,3,0.10328533252080281
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,3,0.1683839956919352
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,3,0.15990400314331055
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,3,0.22023999691009521
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,3,0.15853866934776306
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,3,0.2283680041631063
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,3,0.22437334060668945
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,3,0.21994133790334067
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,3,0.15929599603017172
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,3,0.15291733543078104
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,3,0.22968000173568726
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,3,0.1477013329664866
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,3,0.17552000284194946
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,3,0.2199946641921997
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,3,0.1637333333492279
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,3,0.16531200210253397
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,3,0.2004959980646769
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,3,0.2201333244641622
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,3,0.16357866923014322
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,3,0.16064000129699707
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,3,0.15377066532770792
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,3,0.15331199765205383
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,3,0.24888533353805542
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,3,0.15983999768892923
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,3,0.22485333681106567
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,3,0.22923199335734049
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,3,0.22269866863886514
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,3,0.15452800194422403
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,3,0.21024000644683838
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,3,0.15676800409952799
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,3,0.2162719964981079
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,3,0.15547200043996176
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,3,0.414522647857666
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,3,0.15611199537913004
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,3,0.22805867592493692
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,3,0.16025066375732422
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,3,0.16103466351826987
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,3,0.22173333168029785
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,3,0.15703999996185303
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,3,0.2261013388633728
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,3,0.2225333253542582
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,3,0.15654399991035461
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,3,0.15498666961987814
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,3,0.18341867129007974
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,3,0.15436266859372458
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,3,0.21740265687306723
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,3,0.22945600748062134
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,3,0.160480002562205
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,3,0.1993280053138733
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,3,0.12493333220481873
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,3,0.17076265811920166
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,3,0.1967786749204
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,3,0.2347360054651896
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,3,0.14433067043622336
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,3,0.16873600085576376
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,3,0.15916267037391663
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,3,0.2078346610069275
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,3,0.1236799955368042
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,3,0.14986667037010193
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,3,0.20219733317693075
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,3,0.22721066077550253
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,3,0.15084266662597656
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,3,0.2280693252881368
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,3,0.15197333693504333
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,3,0.18067733446756998
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,3,0.7645920117696127
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,3,0.17982399463653564
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,3,0.16342932979265848
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,3,0.4213333527247111
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,3,0.25490667422612506
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,3,0.16383999586105347
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,3,0.12736533085505167
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,3,0.20866666237513223
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,3,0.1508746643861135
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,3,0.1650879979133606
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,3,0.13081600268681845
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,3,0.16375466187795004
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,3,0.22259199619293213
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,3,0.22542933622996011
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,3,0.12291733423868816
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,3,0.1944426695505778
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,3,0.27588266134262085
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,3,0.2546986738840739
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,3,0.7949386437733968
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,3,0.154341330130895
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,3,0.48106133937835693
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,3,0.30552534262339276
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,3,0.1283573309580485
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,3,0.22830400864283243
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,3,0.1493226687113444
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,3,0.18984532356262207
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,3,0.12516799569129944
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,7,0.1532799998919169
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,3,0.22985066970189413
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,7,0.22234133879343668
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,7,0.15426133076349893
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,3,0.1625706652800242
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,7,0.2071359952290853
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,3,0.24050132433573404
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,7,0.1573973298072815
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,7,0.21024000644683838
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,3,0.16075733304023743
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,7,0.15365866820017496
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,7,0.15967466433842978
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,7,0.20542399088541666
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,7,0.2136639952659607
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,3,0.21071465810139975
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,7,0.16157866517702738
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,7,0.15897599856058756
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,7,0.21337066094080606
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,7,0.20801599820454916
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,7,0.1545919974644979
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,7,0.19273066520690918
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,7,0.15711466471354166
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,7,0.20029334227244058
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,7,0.16185599565505981
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,7,0.1606826682885488
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,7,0.12921599547068277
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,7,0.2083146572113037
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,7,0.21780800819396973
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,7,0.1556053360303243
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,7,1.1541706720987956
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,7,0.21938133239746094
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,7,0.15497066577275595
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,7,0.15402133266131082
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,7,0.23413334290186563
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,7,0.1264746685822805
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,7,0.21346133947372437
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,7,1.1894400119781494
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,7,0.149317334095637
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,7,0.9001920223236084
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,7,0.18421334028244019
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,7,0.15780799587567648
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,7,0.15316800276438394
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,7,0.16029333074887595
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,7,0.22311466932296753
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,7,0.22259199619293213
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,7,0.15518933534622192
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,7,0.21818133195241293
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,7,0.16107199589411417
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,7,0.22830400864283243
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,7,0.15737066666285196
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,7,0.15771200259526572
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,7,0.16454399625460306
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,7,0.20233599344889322
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,7,0.5388533274332682
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,7,0.7935360272725424
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,7,0.2165493369102478
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,7,0.25642667214075726
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,7,0.16331199804941812
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,7,0.22267733017603555
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,7,0.15551466743151346
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,7,0.1775253415107727
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,7,0.15310933192571005
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,7,0.22275733947753906
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,7,0.13884266217549643
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,7,0.16062933206558228
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,7,0.20388799905776978
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,7,0.22447466850280762
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,7,0.1557919979095459
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,7,0.197978675365448
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,7,0.16954666376113892
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,7,0.12519466876983643
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,7,0.2315359910329183
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,7,0.2240053415298462
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,7,0.22424532969792685
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,7,0.16182933251063028
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,7,0.15652799606323242
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,7,0.22285334269205728
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,7,0.15347199638684592
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,7,0.21733333667119345
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,7,0.15713600317637125
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,7,0.16402666767438254
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,7,0.14860799908638
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,7,0.2041013240814209
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,7,0.15826132893562317
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,7,0.1639306644598643
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,7,0.1622880001862844
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,7,0.15877866744995117
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,7,0.2161173423131307
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,7,0.1578933298587799
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,7,0.22780267397562662
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,7,0.17990932861963907
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,7,0.09965866804122925
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,7,0.18386665980021158
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,7,0.1644533375898997
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,7,0.2272640069325765
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,7,0.15569600462913513
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,7,0.1597599983215332
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,7,0.22990934054056802
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,7,0.2227733333905538
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,7,0.16595199704170227
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,7,0.19180800517400107
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,7,0.15411200126012167
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,7,0.2225386699040731
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,7,0.16330132881800333
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,7,0.2220053275426229
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,7,0.2233440081278483
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,7,0.15908799568812051
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,7,0.12292266885439555
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,7,0.15597333510716757
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,7,0.1998400092124939
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,7,0.21637332439422607
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,7,0.15558399756749472
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,7,0.22673600912094116
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,7,0.1569919983545939
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,7,0.6385066509246826
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,7,0.15875200430552164
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,7,0.19891732931137085
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,7,0.16033599774042764
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,7,1.1187787055969238
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,7,0.15507200360298157
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,7,0.22816532850265503
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,7,0.2281279961268107
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,7,0.13894400000572205
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,7,0.15781333049138388
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,7,0.1889280080795288
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,7,0.12024000287055969
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,7,0.21858133872350058
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,7,0.16225600242614746
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,7,0.2246560057004293
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,7,0.16266133387883505
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,7,0.16513599952061972
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,7,0.1664426624774933
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,7,0.1991680065790812
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,7,0.12468799948692322
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,7,0.1675999959309896
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,7,0.23375999927520752
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,7,0.3047093351682027
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,7,0.16512533028920492
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,7,0.21034133434295654
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,7,0.1565546691417694
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,7,0.22062933444976807
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,7,0.13406933347384134
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,7,0.1653600037097931
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,7,0.16412267088890076
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,7,0.15786666671435037
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,7,0.2071839968363444
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,7,0.15657066305478415
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,7,0.15662933389345804
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,7,0.20706133047739664
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,7,0.31082133452097577
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,7,0.4198026657104492
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,7,0.2505226731300354
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,7,0.13523200154304504
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,7,0.17855999867121378
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,7,0.1568106710910797
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,7,0.1779200037320455
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,7,0.17811733484268188
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,7,0.12272533774375916
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,7,0.15407466888427734
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,7,0.2186773419380188
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,7,0.21979200839996338
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,7,0.15476800004641214
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,7,0.15993600090344748
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,7,0.22162665923436484
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,7,0.16179200013478598
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,7,0.27341334025065106
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,7,0.7917493184407552
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,7,0.47541332244873047
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,7,0.17479999860127768
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,7,0.17290133237838745
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,7,0.30484267075856525
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,7,0.1274666686852773
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,7,0.16492266456286112
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,7,0.22953067223230997
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,7,0.16155733664830527
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,7,0.23432000478108725
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,7,0.16744534174601236
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,7,0.22799466053644815
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,7,0.2272640069325765
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,15,0.15228799978892008
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,15,0.1548906664053599
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,15,0.21362666289011636
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,15,1.1589279969533284
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,15,0.15633599956830344
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,15,0.3892373243967692
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,7,0.13035733501116434
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,15,0.11613333225250244
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,15,0.1553866664568583
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,15,0.20957867304484049
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,15,0.1739679972330729
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,15,0.21322667598724365
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,15,0.12658666570981345
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,15,0.15376533071200052
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,15,0.15501333276430765
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,15,0.5233706633249918
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,15,0.16219733158747354
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,15,0.1135093371073405
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,15,0.7681492964426676
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,15,0.2173866629600525
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,15,0.3158133427302043
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,15,0.16620799899101257
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,15,0.2158506711324056
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,15,0.15636266271273294
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,15,0.2164213260014852
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,7,0.20795732736587524
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,15,0.1553759972254435
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,15,0.2293813427289327
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,15,0.15172800421714783
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,15,0.22377065817515054
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,15,0.15429866313934326
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,15,0.2199946641921997
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,15,0.14677866299947104
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,15,0.19986132780710855
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,15,0.16040533781051636
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,15,0.22709333896636963
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,15,0.1541866660118103
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,15,0.21432000398635864
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,15,0.22581332921981812
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,15,0.3481653531392415
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,15,0.16210666298866272
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,15,0.24407466252644858
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,15,0.2103360096613566
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,15,0.14736533164978027
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,15,0.17418134212493896
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,15,0.1567039986451467
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,15,0.21606399615605673
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,15,0.22756266593933105
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,15,0.19954667488733926
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,15,0.35042134920756024
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,15,0.17709332704544067
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,15,0.1574613352616628
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,15,0.1172106663386027
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,15,0.2166879971822103
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,15,0.11011733611424764
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,15,0.41843732198079425
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,15,0.22265599171320596
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,15,0.15873066584269205
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,15,0.15617600083351135
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,15,0.22762133677800497
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,15,0.12031466762224834
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,15,0.23674132426579794
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,15,0.15970666209856668
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,15,0.2262986699740092
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,15,0.15901333093643188
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,15,0.20750399430592856
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,15,0.15518933534622192
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,15,0.22360533475875854
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,15,0.215338667233785
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,15,0.10976533095041911
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,15,0.2325813372929891
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,15,0.15450666348139444
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,15,0.13913066188494363
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,15,0.16909333070119223
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,15,0.6259680191675822
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,15,0.21813867489496866
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,15,0.17314134041468301
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,15,0.3370826641718547
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,15,0.16552000244458517
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,15,0.19527467091878256
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,15,0.13172266880671182
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,15,0.22161600987116495
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,15,0.15627732872962952
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,15,0.8715519905090332
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,15,0.22004266579945883
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,15,0.15639467040697733
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,15,0.2204479972521464
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,15,0.15609600146611533
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,15,0.1630293329556783
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,15,0.1527466674645742
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,15,0.21959465742111206
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,15,0.17195733388264975
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,15,0.16132799784342447
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,15,0.24870399634043375
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,15,0.2304853399594625
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,15,0.15893333156903586
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,15,0.16290666659673056
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,15,0.19906665881474814
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,15,0.15959999958674112
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,15,0.15215466419855753
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,15,0.1992853283882141
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,15,0.4519253174463908
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,15,0.12775466839472452
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,15,0.2273706595102946
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,15,0.16057599584261575
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,15,0.2830079992612203
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,15,0.12256532907485962
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,15,0.21237866083780924
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,15,0.23203200101852417
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,15,0.22354666392008463
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,15,0.1678346594174703
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,15,0.2307093342145284
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,15,0.20297066370646158
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,15,0.12657599647839865
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,15,0.21954133113225302
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,15,0.15495466192563376
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,15,0.22773333390553793
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,15,0.1646666626135508
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,15,0.16806399822235107
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,15,0.22105600436528525
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,15,0.12621866663297018
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,15,0.1556106706460317
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,15,0.1548640032609304
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,15,0.2228320042292277
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,15,0.21901865800221762
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,15,0.21755200624465942
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,15,0.22604266802469888
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,15,0.15169599652290344
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,15,0.11866666873296101
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,15,0.15702933073043823
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,15,0.1775146722793579
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,15,0.23577600717544556
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,15,0.18678933382034302
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,15,0.1451573371887207
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,15,0.15770133336385092
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,15,0.21618133783340454
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,15,0.2249280015627543
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,15,0.12215466300646464
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,15,0.15660799543062845
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,15,0.2232159972190857
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,15,0.16028799613316855
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,15,0.15712533394495645
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,15,0.22380266586939493
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,15,0.2382133404413859
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,15,0.15292267004648843
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,15,0.19251734018325806
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,15,0.41391468048095703
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,15,0.30505599578221637
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,15,0.2507733305295308
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,15,0.17719467480977377
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,15,0.16995733976364136
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,15,0.21686933437983194
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,15,0.274453341960907
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,15,0.17261866728464761
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,15,0.1567466656366984
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,15,0.22176533937454224
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,15,0.15634666879971823
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,15,0.22447466850280762
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,15,0.1525759994983673
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,15,0.2257279952367147
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,15,0.19675199190775552
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,15,0.15548800428708395
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,15,0.2727839946746826
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,15,0.47041066487630206
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,15,0.16451733311017355
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,15,0.1679733395576477
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,15,0.208186666170756
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,15,0.7796106338500977
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,15,0.29573333263397217
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,15,0.2302346626917521
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,15,0.15833066900571188
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,15,0.1834719975789388
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,15,0.15825066963831583
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,31,0.1532533367474874
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,15,0.22463999191919962
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,15,0.16445866227149963
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,31,0.15874133507410684
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,31,0.20568533738454184
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,15,0.1673226753870646
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,15,0.22430400053660074
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,15,0.2053920030593872
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,31,0.1601599951585134
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,31,0.21024533112843832
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,31,0.20703466733296713
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,31,0.11853866775830586
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,31,0.20936532815297446
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,31,0.1553866664568583
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,31,0.16011200348536173
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,31,0.21758399407068887
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,31,0.6997120380401611
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,31,0.15662933389345804
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,31,0.20627733071645102
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,31,1.0988000233968098
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,31,0.1986666719118754
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,31,0.15811733404795328
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,31,0.15591999888420105
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,31,0.21584532658259073
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,31,0.16270933548609415
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,31,0.2855626742045085
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,31,0.15562666455904642
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,31,0.2216800053914388
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,31,0.21688532829284668
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,31,0.15801599621772766
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,31,0.21045867602030435
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,31,0.2216320037841797
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,31,0.29179733991622925
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,31,0.1595253348350525
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,31,0.4773706595102946
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,31,0.12043733398119609
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,31,0.21106666326522827
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,31,0.46612266699473065
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,31,0.22510933876037598
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,31,0.15837333599726358
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,31,0.15503999590873718
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,31,0.15960533420244852
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,31,0.23067200183868408
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,31,0.19536532958348593
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,31,0.22286933660507202
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,31,0.2207733392715454
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,31,0.15544000267982483
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,31,0.15716266632080078
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,31,0.1550933321317037
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,31,0.18526933590571085
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,31,0.1557973325252533
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,31,0.22936532894770303
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,31,0.15307733416557312
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,31,1.1531999905904133
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,31,0.16914665699005127
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,31,1.6301813125610352
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,31,0.17883199453353882
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,31,0.16747732957204184
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,31,0.22267200549443564
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,31,0.15942933162053427
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,31,0.13330666224161783
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,31,0.7092320124308268
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,31,0.22915732860565186
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,31,0.1572160025437673
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,31,0.22292266289393106
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,31,0.1673120061556498
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,31,0.21556266148885092
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,31,0.20478399594624838
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,31,0.14707199732462564
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,31,0.15653333067893982
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,31,0.15677332878112793
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,31,0.18337599436442056
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,31,0.17103999853134155
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,31,0.2262079914410909
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,31,0.22315200169881186
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,31,0.15398933490117392
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,31,0.15851733088493347
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,31,0.1641706625620524
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,31,0.2213546633720398
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,31,0.22697067260742188
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,31,0.1453546682993571
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,31,0.15450132886568704
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,31,0.22024534145991007
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,31,0.15892799695332846
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,31,0.20200000206629434
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,31,0.15779200196266174
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,31,0.1558986703554789
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,31,0.9109706878662109
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,31,0.2260106603304545
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,31,0.15288533767064413
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,31,0.1630880037943522
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,31,0.22126932938893637
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,31,0.22272000710169473
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,31,0.15507733821868896
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,31,0.16639999548594156
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,31,0.21754133701324463
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,31,0.22142932812372842
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,31,0.16715200742085776
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,31,0.16011200348536173
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,31,0.22169599930445352
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,31,0.9289279778798422
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,31,0.14847466349601746
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,31,0.0944053332010905
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,31,0.22477332750956217
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,31,0.1933493415514628
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,31,0.22116265694300333
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,31,0.13434666395187378
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,31,0.15412267049153647
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,31,0.1773866613705953
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,31,0.22574400901794434
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,31,0.7332906723022461
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,31,0.16049599647521973
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,31,2.176016012827555
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,31,0.21788267294565836
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,31,0.2216213345527649
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,31,0.13186132907867432
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,31,0.200981338818868
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,31,0.2203786571820577
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,31,0.21930134296417236
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,31,0.15598932902018228
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,31,0.15474667151769003
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,31,0.22056533892949423
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,31,0.1593653361002604
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,31,0.16686399777730307
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,31,0.22452799479166666
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,31,0.15345600247383118
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,31,0.21869333585103354
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,31,0.1546346644560496
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,31,0.2234506607055664
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,31,0.16195199886957803
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,31,0.15243732929229736
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,31,0.2257066567738851
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,31,0.1991893251736959
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,31,0.14853333433469137
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,31,0.24219733476638794
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,31,0.16824533541997275
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,31,0.22404799858729044
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,31,0.15615999698638916
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,31,0.18856000900268555
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,31,0.12585600217183432
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,31,0.16259732842445374
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,31,0.16101333498954773
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,31,0.2304746707280477
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,31,0.2217866579691569
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,31,0.22272000710169473
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,31,0.20084265867869058
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,31,0.15843199690183005
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,31,0.13637333114941916
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,31,0.2028533418973287
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,31,0.3094453414281209
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,31,0.2464159925778707
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,31,0.42182401816050213
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,31,0.18130133549372354
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,31,0.12852799892425537
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,31,0.16260799765586853
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,31,0.222271998723348
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,31,0.23355732361475626
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,31,0.15425599614779154
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,31,0.22266666094462076
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,31,0.1255626678466797
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,31,0.18790932496388754
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,31,0.1197706659634908
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,31,0.228277325630188
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,31,0.12236266334851582
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,31,0.20390933752059937
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,31,0.27643199761708576
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,31,0.7810186545054117
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,31,0.29286932945251465
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,31,0.17305066188176474
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,31,0.46091731389363605
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,31,0.15996799866358438
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,31,0.17161067326863608
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,31,0.23572800556818643
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,31,0.15994133551915488
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,63,0.15079466501871744
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,31,0.22748800118764242
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,31,0.21638933817545572
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,31,0.4779520034790039
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,31,0.16296533743540445
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,31,0.22791467110315958
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,63,0.1545973320802053
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,63,0.21389333407084146
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,31,0.16040000319480896
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,63,0.16450666387875876
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,63,1.145370642344157
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,31,0.21228800217310587
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,63,0.20760534207026163
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,63,0.15686399737993875
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,63,0.12132267157236735
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,63,0.18948266903559366
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,63,0.1932906707127889
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,63,0.20634132623672485
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,63,0.15804800391197205
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,63,0.7912000020345052
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,63,0.216922660668691
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,63,0.19757866859436035
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,63,0.1622880001862844
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,63,0.16116799910863241
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,63,0.2221119999885559
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,63,0.1585493286450704
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,63,0.16290666659673056
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,63,0.21124267578125
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,63,0.22288000583648682
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,63,0.15544000267982483
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,63,0.2237066626548767
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,63,0.1586026648680369
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,63,0.21580266952514648
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,63,0.15785066286722818
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,63,0.22049067417780557
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,63,0.14829333623250326
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,63,0.19904534022013345
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,63,0.15619200468063354
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,63,0.22294400135676065
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,63,0.1455573340257009
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,63,0.09909866253534953
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,63,0.1436746617158254
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,63,0.17972799142201742
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,63,0.11781866351763408
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,63,0.6487253506978353
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,63,0.21342933177947998
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,63,0.15546666582425436
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,63,0.12126933534940083
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,63,0.22145599126815796
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,63,0.15847999850908914
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,63,0.16190399726231894
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,63,0.21217066049575806
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,63,0.2670346697171529
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,63,0.26053865750630695
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,63,0.15017066399256387
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,63,0.15690132975578308
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,63,0.2176106572151184
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,63,0.1929653286933899
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,63,1.1703786849975586
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,63,0.1573919951915741
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,63,0.21784534056981406
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,63,0.15576000014940897
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,63,0.22359466552734375
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,63,0.16390933593114218
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,63,0.15385599931081137
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,63,1.1994346777598064
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,63,0.8206026554107666
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,63,1.6713172594706218
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,63,0.7479946613311768
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,63,0.19801066319147745
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,63,0.22205867369969687
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,63,0.154341330130895
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,63,0.1781760056813558
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,63,0.16124799847602844
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,63,0.23034133513768515
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,63,0.16245333353678384
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,63,0.2235893408457438
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,63,0.21397866805394491
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,63,0.23150932788848877
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,63,0.15731733043988547
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,63,0.11782933274904887
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,63,0.17449599504470825
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,63,0.19337600469589233
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,63,0.12006400028864543
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,63,0.15759467085202536
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,63,0.22856533527374268
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,63,0.3768479824066162
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,63,0.2182240088780721
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,63,0.1595253348350525
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,63,0.21913599967956543
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,63,0.38069331645965576
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,63,0.14756266276041666
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,63,0.1609386702378591
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,63,0.21815999348958334
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,63,0.2200266718864441
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,63,0.15613333384195963
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,63,0.24849599599838257
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,63,0.1588533322016398
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,63,0.21613866090774536
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,63,0.15276267131169638
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,63,0.15763200322786966
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,63,1.1999093691507976
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,63,0.21189866463343301
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,63,0.1534346640110016
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,63,0.9572587013244629
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,63,0.7952000300089518
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,63,0.3086346586545308
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,63,0.1702293356259664
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,63,0.22883200645446777
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,63,0.15165332953135172
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,63,0.21964265902837118
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,63,0.1532906691233317
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,63,0.22870934009552002
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,63,0.15550399820009866
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,63,0.2241706649462382
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,63,1.151157299677531
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,63,0.15454933047294617
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,63,0.16529599825541177
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,63,0.31857067346572876
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,63,0.09869333108266194
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,63,0.15387200315793356
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,63,0.22392000754674277
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,63,0.16516266266504923
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,63,0.22679466009140015
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,63,0.23757332563400269
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,63,0.16190933187802634
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,63,0.2244373361269633
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,63,0.13962133725484213
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,63,0.21505065759023032
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,63,0.22451200087865195
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,63,0.15314132968584696
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,63,0.1636319955190023
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,63,0.21532267332077026
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,63,0.15634666879971823
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,63,0.2004586656888326
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,63,0.14547733465830484
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,63,0.23661333322525024
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,63,0.23084266980489096
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,63,0.15711999932924905
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,63,0.30248000224431354
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,63,0.1576533317565918
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,63,0.13864533106486002
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,63,0.17975467443466187
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,63,0.1562026639779409
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,63,0.15617066621780396
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,63,0.1739573280016581
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,63,0.2230293353398641
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,63,0.15429332852363586
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,63,0.22024534145991007
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,63,0.12001599868138631
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,63,0.15927466750144958
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,63,0.3282559911410014
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,63,0.416922648747762
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,63,0.24363199869791666
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,63,0.15989333391189575
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,63,0.18374399344126383
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,63,0.22026666005452475
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,63,0.13346667091051737
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,63,0.22095467646916708
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,63,0.15832533439000449
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,63,0.12905066212018332
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,63,0.22292800744374594
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,63,0.1588320036729177
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,63,0.2290666699409485
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,63,0.19125332434972128
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,63,0.15280000368754068
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,63,0.1971893310546875
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,63,0.2741173307100932
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,63,0.7722933292388916
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,63,0.13685333728790283
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,63,0.16153066356976828
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,63,0.2823999921480815
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,63,0.45465068022410077
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,63,0.16549866398175558
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,63,0.22369599342346191
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,63,0.15972800056139627
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,63,0.22242667277654013
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,63,0.1593653361002604
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,63,0.2264960010846456
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,63,0.14947199821472168
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,127,0.15517333149909973
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,63,0.16537599762280783
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,127,0.15676800409952799
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,63,0.23003200689951578
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,127,0.16379200418790182
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,127,0.2113386591275533
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,127,0.2083253264427185
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,127,0.21214399735132852
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,127,1.1521600087483723
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,63,0.7113386789957682
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,127,0.15611199537913004
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,127,0.15227199594179788
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,127,0.15147733688354492
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,127,0.20829866329828897
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,127,0.16724266608556113
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,127,0.20987200736999512
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,127,0.15990400314331055
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,127,0.2802559932072957
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,127,0.1548479994138082
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,127,0.16083733240763345
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,127,0.15574399630228677
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,127,0.21773332357406616
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,127,0.23331199089686075
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,127,0.1593706707159678
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,127,0.38628800710042316
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,127,0.2171306610107422
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,127,0.16638400157292685
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,127,0.22137600183486938
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,127,0.1569653352101644
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,127,0.15530666708946228
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,127,0.1556426684061686
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,127,0.22160534063975015
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,127,0.21979200839996338
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,127,0.14588800072669983
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,127,0.2049973408381144
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,127,0.15292267004648843
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,127,0.15492799878120422
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,127,0.2599946657816569
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,127,0.1581653356552124
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,127,0.21779199441274008
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,127,0.216154674688975
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,127,0.1978506644566854
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,127,0.20098666350046793
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,127,0.2219466765721639
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,127,0.22036266326904297
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,127,2.9715840021769204
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,127,0.15520532925923666
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,127,0.15243200461069742
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,127,0.20868800083796182
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,127,0.12081066767374675
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,127,0.2015519936879476
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,127,1.030842701594035
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,127,0.21640000740687051
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,127,0.15593600273132324
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,127,0.21917333205540976
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,127,0.18498667081197104
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,127,0.16145066420237222
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,127,0.15662399927775064
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,127,0.22154132525126138
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,127,0.22702399889628092
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,127,0.1569973329703013
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,127,0.15449066956837973
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,127,0.22738132874170938
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,127,1.044975996017456
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,127,0.22395199537277222
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,127,0.20904000600179037
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,127,0.15250666936238608
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,127,0.1213653286298116
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,127,0.4437013467152913
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,127,0.15969066818555197
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,127,0.2272640069325765
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,127,0.16547200083732605
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,127,0.1597493290901184
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,127,0.8012320200602213
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,127,0.23171732823053995
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,127,0.1609440048535665
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,127,0.218666672706604
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,127,0.16037866473197937
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,127,0.22270933787027994
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,127,0.15823466579119363
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,127,0.2116373380025228
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,127,0.15776532888412476
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,127,0.2058133284250895
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,127,0.15501333276430765
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,127,0.15527466932932535
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,127,0.22920533021291098
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,127,0.2244960069656372
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,127,0.15987733006477356
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,127,0.1546239952246348
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,127,0.2206666668256124
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,127,0.22786666949590048
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,127,0.16320533553759256
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,127,0.23365867137908936
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,127,0.15820266803105673
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,127,0.12139200170834859
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,127,0.2301973303159078
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,127,0.193066676457723
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,127,0.230512003103892
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,127,0.15678933262825012
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,127,0.12760532895723978
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,127,0.10748799641927083
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,127,0.15342400471369425
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,127,0.22006400426228842
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,127,0.22310932477315268
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,127,0.16422399878501892
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,127,0.22137600183486938
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,127,0.22541866699854532
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,127,0.15798399845759073
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,127,0.2159199913342794
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,127,0.1548373301823934
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,127,0.16082666317621866
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,127,0.21938133239746094
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,127,0.21132800976435342
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,127,0.22039467096328735
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,127,0.15210666259129843
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,127,0.10985599954922994
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,127,0.2176213264465332
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,127,0.1583466629187266
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,127,0.14654933412869772
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,127,0.16453867157300314
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,127,0.2247999906539917
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,127,0.1578879952430725
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,127,0.15625066558519998
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,127,0.2165706753730774
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,127,0.18080000082651773
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,127,0.22803199291229248
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,127,0.15808533628781637
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,127,0.12562666336695352
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,127,0.22522666056950888
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,127,0.1546026666959127
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,127,0.20179200172424316
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,127,0.1880906621615092
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,127,0.1567359964052836
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,127,0.23667732874552408
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,127,0.2152000069618225
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,127,0.36343999703725177
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,127,0.23252266645431519
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,127,0.16987200578053793
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,127,0.2221333384513855
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,127,0.12773332993189493
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,127,0.22307199239730835
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,127,0.15330666303634644
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,127,0.15849600235621134
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,127,0.16497600078582764
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,127,0.2203893264134725
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,127,0.1565546691417694
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,127,0.16124266386032104
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,127,0.2593119939168294
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,127,0.19513599077860513
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,127,0.16284799575805664
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,127,0.32119999329249066
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,127,0.43087999025980633
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,127,0.22341332832972208
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,127,0.11314666271209717
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,127,0.23456533749898276
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,127,0.16215999921162924
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,127,0.16313599546750387
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,127,0.2262399991353353
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,127,0.16293332974116007
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,127,0.35953601201375324
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,127,0.25777600208918255
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,127,0.1504639983177185
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,127,0.19610132773717245
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,127,0.8090826670328776
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,127,0.30505599578221637
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,127,0.18466667334238687
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,127,0.3054933349291484
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,127,0.1748853325843811
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,127,0.16648000478744507
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,127,0.47785600026448566
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,127,0.23199466864267984
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,127,0.13500266273816428
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,127,0.16659733653068542
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,127,0.23269865910212198
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,127,0.23137066761652628
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,255,0.09711999694506328
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,255,0.15985600153605142
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,255,0.1622880001862844
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,255,0.2094293236732483
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,127,0.16194666425387064
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,255,0.22013866901397705
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,255,0.1297706663608551
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,127,0.22228266795476279
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,127,0.16498133540153503
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,255,0.2189226746559143
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,255,0.1569493313630422
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,255,0.15160533785820007
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,255,0.2097813288370768
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,255,0.20713067054748535
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,127,0.2180746595064799
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,255,0.1577173372109731
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,255,0.1530400017897288
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,255,0.16410666704177856
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,255,0.21438932418823242
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,255,0.42851734161376953
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,255,0.18358933925628662
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,255,0.21688000361124674
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,255,0.16396799683570862
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,255,0.15758933623631796
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,255,0.2707466681798299
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,255,0.21969600518544516
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,255,0.17377066612243652
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,255,0.22188800573349
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,255,0.15892799695332846
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,255,0.15247467160224915
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,255,0.15902400016784668
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,255,0.21851199865341187
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,255,0.1551253298918406
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,255,0.2750506599744161
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,255,0.33215999603271484
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,255,0.20381865898768106
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,255,0.16356266538302103
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,255,0.15269333124160767
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,255,0.13571199774742126
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,255,0.1778986652692159
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,255,0.692138671875
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,255,0.2149333357810974
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,255,0.16451199849446616
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,255,0.21778132518132529
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,255,0.1532639960447947
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,255,0.22062933444976807
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,255,1.0902453263600667
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,255,0.2248693307240804
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,255,0.2145813306172689
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,255,0.8495999972025553
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,255,0.22044267257054648
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,255,0.20145066579182944
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,255,0.5504586696624756
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,255,0.2395039995511373
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,255,0.15385066469510397
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,255,0.1560533344745636
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,255,0.13320533434549967
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,255,0.22454933325449625
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,255,0.22034666935602823
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,255,0.17309866348902384
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,255,0.15314666430155435
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,255,0.15407466888427734
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,255,0.22611733277638754
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,255,0.22086934248606363
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,255,0.21778666973114014
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,255,0.15743999679883322
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,255,0.15896532932917276
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,255,0.19793067375818887
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,255,0.2302346626917521
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,255,0.15494400262832642
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,255,0.22105600436528525
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,255,0.18115200599034628
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,255,0.4059946537017822
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,255,0.16250666975975037
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,255,0.1616106629371643
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,255,0.21829867362976074
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,255,0.1548479994138082
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,255,0.22263999780019125
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,255,0.15597333510716757
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,255,0.25996265808741253
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,255,0.1744906703631083
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,255,0.18854933977127075
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,255,0.160671999057134
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,255,0.1984213391939799
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,255,0.16205333669980368
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,255,0.44154131412506104
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,255,0.22587200005849203
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,255,0.2296746571858724
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,255,0.1606880029042562
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,255,0.16224533319473267
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,255,0.228928009668986
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,255,0.21684267123540243
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,255,0.23335466782251993
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,255,0.1609653333822886
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,255,0.16346133748690286
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,255,0.15492266416549683
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,255,0.2291626731554667
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,255,0.16269866625467935
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,255,0.24023999770482382
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,255,0.21201066176096597
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,255,1.343173344930013
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,255,0.22006400426228842
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,255,0.22690133253733316
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,255,0.1328053375085195
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,255,0.16010133425394693
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,255,0.5350879828135172
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,255,0.23146667083104452
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,255,0.20645866791407266
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,255,0.14722667137781778
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,255,0.1844159960746765
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,255,0.15558933218320212
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,255,0.22735466559727988
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,255,0.16013866662979126
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,255,0.2311840057373047
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,255,0.15388799707094827
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,255,0.2749386628468831
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,255,0.15219733119010925
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,255,0.16314133008321127
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,255,0.16124266386032104
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,255,0.20949333906173706
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,255,0.20469866196314493
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,255,0.2176213264465332
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,255,0.16643200318018594
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,255,0.1564853290716807
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,255,0.1402506629625956
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,255,0.2255093256632487
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,255,0.15946132938067117
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,255,0.1591093341509501
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,255,0.2061013380686442
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,255,0.21716799338658652
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,255,0.20512000719706217
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,255,0.15643733739852905
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,255,0.1719520092010498
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,255,0.1637173295021057
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,255,0.2534986734390259
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,255,0.20646933714548746
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,255,0.2318399945894877
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,255,0.12993599971135458
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,255,0.218176007270813
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,255,0.17547200123469034
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,255,0.5959893465042114
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,255,0.12657599647839865
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,255,0.12635733683904013
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,255,0.2233546574910482
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,255,0.12947199741999307
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,255,0.11841600139935811
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,255,0.19672532876332602
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,255,0.2229493260383606
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,255,0.3506186803181966
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,255,0.17505600055058798
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,255,0.451530655225118
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,255,0.27517332633336383
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,255,0.21964800357818604
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,255,0.5906826655069987
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,255,0.13272000352541605
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,255,0.21335999170939127
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,255,0.16240533192952475
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,255,0.21795733769734701
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,255,0.22102399667104086
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,255,0.15983999768892923
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,255,0.590117335319519
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,255,0.20844266812006632
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,255,0.12520533800125122
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,255,0.2025173306465149
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,255,0.3464426596959432
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,255,0.21562665700912476
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,255,0.26502400636672974
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,255,0.1726026733716329
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,255,0.17013333241144815
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,255,0.3522080183029175
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,255,0.524890661239624
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,255,0.8504587014516195
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,255,0.16548267006874084
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,255,0.2343626618385315
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,255,0.24208533763885498
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,255,0.17406400044759116
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,255,0.14512532949447632
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,511,0.1590986649195353
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,255,0.6977813243865967
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,511,0.15169066190719604
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,511,0.2148853341738383
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,255,0.13597866892814636
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,511,0.1497813363869985
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,511,0.15899200240770975
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,511,0.21355199813842773
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,511,0.21565866470336914
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,511,0.1572426656881968
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,511,0.16035200158754984
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,511,0.15785066286722818
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,511,0.21063466866811117
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,511,0.2160053253173828
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,511,0.15899733702341715
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,511,0.19382399320602417
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,511,0.15903466939926147
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,511,0.9060160319010416
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,255,0.9429866472880045
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,511,0.3103626569112142
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,511,0.22522666056950888
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,511,0.23725332816441855
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,511,0.4206026792526245
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,511,0.15547733505566916
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,511,0.22834134101867676
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,511,0.2241386572519938
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,511,1.1014880339304607
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,511,0.16523733735084534
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,511,0.15786133209864298
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,511,0.2213333249092102
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,511,0.16022933522860208
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,511,0.21492799123128256
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,511,0.22143999735514322
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,511,0.19753066698710123
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,511,1.6130666732788086
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,511,0.22004266579945883
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,511,0.16082666317621866
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,511,0.1612320045630137
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,511,0.15492799878120422
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,511,0.2344800035158793
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,511,0.22247467438379923
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,511,1.1540319919586182
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,511,1.108848015467326
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,511,0.1551040013631185
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,511,0.17074666420618692
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,511,0.22359466552734375
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,511,0.22107734282811484
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,511,0.21901865800221762
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,511,0.15838399529457092
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,511,0.20376533269882202
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,511,0.1530239979426066
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,511,0.16012799739837646
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,511,0.16234133640925089
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,511,0.2558666666348775
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,511,0.22069867451985678
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,511,0.13059733311335245
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,511,0.17340266704559326
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,511,0.2201706568400065
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,511,0.15602667133013406
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,511,0.15550933281580606
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,511,0.2529386679331462
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,511,0.16089066863059998
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,511,0.16526933511098227
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,511,0.17398399114608765
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,511,0.154448002576828
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,511,0.21359467506408691
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,511,0.20362132787704468
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,511,0.160480002562205
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,511,0.18596800168355307
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,511,0.21620800097783408
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,511,0.16193067034085593
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,511,0.2209440072377523
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,511,0.231930673122406
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,511,0.16060266892115274
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,511,0.2034026583035787
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,511,0.12401066223780315
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,511,0.20884267489115396
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,511,0.16311466693878174
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,511,0.16858132680257162
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,511,0.22871466477711996
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,511,0.22427733739217123
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,511,0.2055306633313497
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,511,0.163290669520696
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,511,0.22425599892934164
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,511,0.15870933731396994
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,511,0.22037333250045776
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,511,0.2264159917831421
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,511,0.09539733330408733
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,511,0.16673066218694052
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,511,0.15402666727701822
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,511,0.15758933623631796
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,511,0.9962720076243082
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,511,0.15780267119407654
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,511,0.2201919953028361
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,511,0.3039199908574422
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,511,0.16036799550056458
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,511,0.22366400559743246
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,511,0.15693333745002747
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,511,0.21501866976420084
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,511,0.15871999661127725
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,511,0.15496533115704855
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,511,0.17704000075658163
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,511,0.17417067289352417
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,511,0.6387360095977783
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,511,0.15101866920789084
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,511,0.15946132938067117
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,511,0.1590720017751058
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,511,0.14517333110173544
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,511,0.1551466683546702
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,511,0.23033599058787027
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,511,0.16155200203259787
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,511,0.22178133328755698
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,511,0.524069348971049
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,511,0.20058133204778036
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,511,0.14409599701563516
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,511,0.12653332948684692
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,511,0.394538680712382
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,511,0.18184532721837363
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,511,0.12781332929929098
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,511,0.20358399550120035
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,511,0.1614400049050649
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,511,0.15308266878128052
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,511,0.22060267130533853
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,511,0.1641279955705007
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,511,0.21778132518132529
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,511,0.15612266461054483
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,511,0.22137600183486938
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,511,0.16215999921162924
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,511,0.22453866402308145
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,511,0.1567520002524058
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,511,0.2047040065129598
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,511,0.16134400169054666
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,511,0.19643733898798624
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,511,0.2192373275756836
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,511,0.2641333341598511
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,511,0.2299786607424418
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,511,0.1574026644229889
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,511,0.15808533628781637
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,511,0.2232960065205892
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,511,0.1581706702709198
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,511,0.22083733479181925
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,511,0.13058666388193765
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,511,0.21935999393463135
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,511,0.12828266620635986
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,511,0.21679999430974325
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,511,0.13153066237767538
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,511,0.20173867543538412
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,511,0.3959519863128662
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,511,0.24921600023905435
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,511,0.5270453294118246
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,511,0.1699626644452413
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,511,0.19941333929697672
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,511,0.49262932936350506
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,511,0.19965867201487222
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,511,0.3227039972941081
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,511,0.16702399651209512
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,511,0.1922666629155477
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,511,0.2188106576601664
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,511,0.14620799819628397
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,511,0.16556266943613687
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,511,0.16661866505940756
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,511,0.36830933888753253
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,511,0.2959786653518677
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,511,0.44727468490600586
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,511,0.28626133998235065
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,511,0.9505759874979655
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,511,0.6310826539993286
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,511,0.25149865945180255
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,511,0.46034133434295654
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,511,0.22747200727462769
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,511,0.3779360055923462
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,511,0.21859200795491537
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,1023,0.09675733248392741
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,511,0.4320640166600545
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,1023,0.12775466839472452
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,1023,0.1558613379796346
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,1023,0.2042506734530131
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,1023,0.15869866808255514
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,511,0.806933323542277
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,511,0.9750826358795166
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,1023,0.20765332380930582
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,511,0.33581864833831787
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,511,0.32765867312749225
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,1023,0.1530346671740214
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,1023,0.1586079994837443
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,1023,0.3386293252309163
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,1023,0.20773333311080933
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,511,0.20956265926361084
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,511,0.31146132946014404
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,1023,0.16127467155456543
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,1023,0.2132906715075175
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,1023,0.22104533513387045
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,1023,0.15537066260973612
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,1023,0.19749333461125693
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,1023,0.15890133380889893
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,1023,0.1662453313668569
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,1023,0.6450613339742025
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,1023,0.15987733006477356
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,1023,0.16006400187810263
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,1023,0.21925334135691324
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,1023,0.22839999198913574
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,1023,0.15598400433858237
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,1023,0.23274133602778116
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,1023,0.1612106661001841
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,1023,0.2195840080579122
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,1023,0.23643734057744345
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,1023,0.1606826682885488
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,1023,0.15449066956837973
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,1023,0.16056533654530844
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,1023,0.22124799092610678
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,1023,0.20004266500473022
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,1023,0.1553439994653066
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,1023,0.16089600324630737
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,1023,0.2227999965349833
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,1023,0.16128533085187277
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,1023,1.061088005701701
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,1023,0.15784000356992087
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,1023,0.2204479972521464
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,1023,1.00819198290507
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,1023,0.16145066420237222
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,1023,0.225055992603302
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,1023,0.15807466705640158
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,1023,0.1609813372294108
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,1023,0.22446399927139282
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,1023,0.2197386622428894
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,1023,0.15898133317629495
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,1023,1.158794641494751
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,1023,0.16056533654530844
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,1023,0.1273973286151886
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,1023,0.2879466613133748
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,1023,0.22825600703557333
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,1023,0.2327786684036255
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,1023,0.15318933129310608
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,1023,0.15878400206565857
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,1023,0.2258346676826477
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,1023,0.1575093368689219
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,1023,0.15894400080045065
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,1023,0.18291199207305908
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,1023,0.15677866339683533
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,1023,0.2302293380101522
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,1023,0.16222400466601053
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,1023,0.22545599937438965
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,1023,0.20017067591349283
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,1023,0.16668800512949625
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,1023,0.15341867009798685
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,1023,0.23758933941523233
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,1023,0.2229386568069458
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,1023,0.4028053283691406
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,1023,0.1651040017604828
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,1023,0.15705600380897522
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,1023,0.22360533475875854
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,1023,0.16525866587956747
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,1023,0.2286240061124166
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,1023,0.3009066581726074
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,1023,0.23783999681472778
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,1023,0.1579093337059021
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,1023,0.1758026679356893
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,1023,0.16495466232299805
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,1023,0.17916800578435263
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,1023,0.15808533628781637
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,1023,0.15799466768900552
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,1023,0.2316853404045105
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,1023,0.16874132553736368
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,1023,0.23169066508611044
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,1023,0.21984533468882242
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,1023,0.22095467646916708
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,1023,0.1222879985968272
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,1023,0.16210666298866272
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,1023,0.2179093360900879
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,1023,0.15428800384203592
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,1023,0.22378667195638022
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,1023,0.22531733910242716
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,1023,0.159770667552948
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,1023,0.1816320021947225
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,1023,0.20246400435765585
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,1023,0.1548640032609304
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,1023,0.15541866421699524
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,1023,0.44885865847269696
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,1023,0.23363200823465982
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,1023,0.15955733259518942
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,1023,0.1583039959271749
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,1023,0.23224000136057535
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,1023,0.19854400555292764
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,1023,0.2172106703122457
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,1023,0.1607093314329783
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,1023,0.1553813318411509
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,1023,0.2328959902127584
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,1023,0.15171200037002563
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,1023,0.16611733039220175
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,1023,0.24902933835983276
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,1023,0.20614933967590332
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,1023,0.22695465882619223
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,1023,0.15917332967122397
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,1023,0.15797332922617593
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,1023,0.26341332991917926
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,1023,0.17138665914535522
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,1023,0.23432532946268717
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,1023,0.17659199237823486
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,1023,0.22420267264048258
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,1023,0.15946666399637857
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,1023,0.22251200675964355
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,1023,0.12261866529782613
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,1023,0.20308266083399454
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,1023,0.22233066956202188
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,1023,0.166485329469045
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,1023,0.20710933208465576
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,1023,0.16164799531300864
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,1023,0.24508267641067505
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,1023,0.2985546588897705
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,1023,0.2999040087064107
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,1023,0.174234668413798
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,1023,1.1050346692403157
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,1023,0.26657066742579144
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,1023,0.1663146714369456
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,1023,0.22830933332443237
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,1023,0.12362133463223775
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,1023,0.22220800320307413
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,1023,0.15692800283432007
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,1023,0.22875199715296426
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,1023,0.16938134034474692
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,1023,0.22975466648737589
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,1023,0.21194666624069214
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,1023,0.12185066938400269
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,1023,0.4957386652628581
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,1023,0.32502933343251544
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,1023,0.268069326877594
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,1023,0.57697065671285
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,1023,0.4033653338750203
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,1023,0.24368532498677573
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,1023,0.32605334122975665
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,1023,0.2900480031967163
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,1023,0.2779253323872884
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,1023,0.2712373336156209
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,1023,0.21805334091186523
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,1023,0.21309866507848105
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,1023,0.227183997631073
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,1023,0.26498132944107056
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,1023,0.22936532894770303
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,1023,0.25068267186482746
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,1023,0.6540480057398478
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,1023,1.1410186290740967
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,1023,0.6487040122350057
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,1023,0.4317973454793294
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,1023,0.3887733221054077
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,1023,0.8201813697814941
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,1023,0.36686933040618896
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,1023,0.5708426634470621
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,1023,0.35608001550038654
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,1023,0.5398133198420206
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,2047,0.15042666594187418
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,1023,0.3497333526611328
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,2047,0.22658133506774902
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,1023,1.2368480364481609
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,1023,0.3482133150100708
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,2047,0.1583093305428823
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,1023,1.2793760299682617
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,1023,0.3450666666030884
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,2047,0.31708266337712604
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,1023,0.5276853243509928
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,2047,0.16148799657821655
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,2047,0.21962666511535645
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,2047,0.15373333295186362
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,2047,0.29466134309768677
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,2047,0.15774933497111002
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,2047,0.2317919929822286
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,2047,0.21663467089335123
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,2047,0.21403199434280396
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,2047,0.13266133268674216
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,2047,0.212336003780365
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,2047,0.17266666889190674
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,2047,0.21625065803527832
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,2047,0.349616010983785
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,2047,0.23030400276184082
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,2047,0.2248106598854065
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,2047,0.17668267091115317
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,2047,0.14069867134094238
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,2047,0.22936000426610312
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,2047,0.1601599951585134
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,2047,0.23730667432149252
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,2047,0.15758933623631796
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,2047,0.7320906321207682
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,2047,0.15800533692042032
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,2047,0.25749866167704266
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,2047,0.1618133286635081
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,2047,0.22784000635147095
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,2047,0.2106293241182963
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,2047,0.16380266348520914
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,2047,0.1632266640663147
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,2047,0.23132266600926718
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,2047,0.15777599811553955
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,2047,1.103600025177002
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,2047,0.2309760053952535
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,2047,0.17117865880330405
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,2047,0.22457599639892578
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,2047,0.3229386607805888
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,2047,0.15647466977437338
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,2047,0.2286293307940165
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,2047,0.15877866744995117
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,2047,0.23020267486572266
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,2047,0.12877333164215088
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,2047,0.2276960015296936
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,2047,0.20081067085266113
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,2047,0.15557866295178732
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,2047,0.15105066696802774
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,2047,0.13702399532000223
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,2047,0.22293333212534586
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,2047,0.16673066218694052
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,2047,0.22792534033457437
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,2047,0.2287893295288086
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,2047,0.17698667446772257
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,2047,0.22592532634735107
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,2047,0.21849600474039713
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,2047,0.16288533806800842
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,2047,0.1714400053024292
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,2047,0.15717867016792297
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,2047,0.18283732732137045
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,2047,0.16083199779192606
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,2047,0.23229332764943442
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,2047,0.20804266134897867
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,2047,0.15570132931073508
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,2047,0.19173866510391235
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,2047,0.16357866923014322
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,2047,0.15957333644231161
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,2047,0.2288480003674825
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,2047,0.22778666019439697
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,2047,0.21780800819396973
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,2047,1.0226826667785645
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,2047,0.16218666235605875
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,2047,0.23215999205907187
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,2047,0.1304800013701121
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,2047,0.16408000389734903
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,2047,1.7060267130533855
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,2047,0.2360853354136149
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,2047,0.2159093419710795
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,2047,0.15982932845751444
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,2047,0.1611840029557546
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,2047,0.1616426706314087
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,2047,0.2223200003306071
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,2047,0.155648003021876
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,2047,0.22420267264048258
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,2047,0.11340799927711487
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,2047,0.20197866360346475
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,2047,0.16717867056528726
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,2047,0.24157865842183432
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,2047,0.21418132384618124
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,2047,0.1586666703224182
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,2047,0.22403200467427573
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,2047,0.1588533322016398
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,2047,0.23060800631841025
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,2047,0.15744533141454062
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,2047,0.15678399801254272
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,2047,0.22642133633295694
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,2047,0.15424000223477682
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,2047,0.22717867294947305
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,2047,0.1323093374570211
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,2047,0.5340373516082764
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,2047,0.19452265898386636
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,2047,0.15596800049146017
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,2047,0.16160533825556436
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,2047,0.23136534293492636
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,2047,0.22716800371805826
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,2047,0.1318666636943817
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,2047,0.22550400098164877
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,2047,0.1264639993508657
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,2047,0.15572266777356467
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,2047,0.22949866453806558
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,2047,0.21342400709788004
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,2047,0.1874986688296
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,2047,0.16577600439389548
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,2047,0.16878400246302286
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,2047,0.23244800170262656
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,2047,0.20777066548665366
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,2047,0.22511466344197592
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,2047,0.16171200076738992
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,2047,0.15999466180801392
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,2047,0.2290133237838745
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,2047,0.2220159967740377
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,2047,0.1634666621685028
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,2047,0.23915733893712363
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,2047,0.21979733308156332
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,2047,0.1592693328857422
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,2047,0.16164799531300864
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,2047,0.21043733755747476
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,2047,0.3439573446909587
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,2047,0.3731946547826131
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,2047,0.23578667640686035
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,2047,0.2881173292795817
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,2047,0.21434666713078818
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,2047,0.2558773358662923
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,2047,0.24415467182795206
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,2047,0.2034613291422526
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,2047,0.19430400927861533
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,2047,0.24994667371114096
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,2047,0.19578667481740317
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,2047,0.8940160274505615
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,2047,0.19350399573644003
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,2047,0.23549866676330566
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,2047,0.18896534045537314
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,2047,0.20892800887425741
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,2047,0.4539253314336141
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,2047,0.7056053479512533
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,2047,0.6335946718851725
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,2047,0.4017333189646403
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,2047,0.7901866436004639
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,2047,0.5454453229904175
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,2047,0.3780746857325236
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,2047,0.5085013310114542
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,2047,0.35809067885080975
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,2047,0.3486986557642619
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,2047,0.5044693152109782
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,2047,0.4899786710739136
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,2047,0.344101349512736
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,2047,0.49061334133148193
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,2047,0.3442080020904541
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,2047,0.4744693438212077
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,2047,1.0445120334625244
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,2047,1.5312639872233074
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,2047,0.6971999804178873
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,2047,1.2110506693522136
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,2047,1.032538652420044
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,2047,0.665669322013855
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,2047,0.6367306709289551
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,2047,0.9552960395812988
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,2047,0.6259040037790934
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,2047,0.931552012761434
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,4095,0.12229866782824199
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,2047,0.6141653458277384
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,2047,0.9194986820220947
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,4095,0.16244799892107645
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,4095,0.20914665857950845
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,2047,0.6142666737238566
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,4095,0.2144533395767212
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,4095,0.14600533246994019
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,4095,0.1616426706314087
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,2047,0.912821372350057
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,4095,0.15743999679883322
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,4095,1.1731839974721272
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,2047,0.6116160154342651
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,4095,0.21437333027521768
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,4095,0.2392639915148417
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,4095,0.157231996456782
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,4095,1.1578186353047688
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,4095,0.1716853380203247
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,4095,0.2217493255933126
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,4095,0.15829333662986755
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,4095,0.20409599939982095
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,4095,1.166874647140503
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,4095,2.394181410471598
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,4095,0.168938676516215
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,4095,0.22616533438364664
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,4095,0.16362667083740234
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,4095,0.22236265738805136
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,4095,0.15923200050989786
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,4095,0.2388533353805542
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,4095,0.1575093368689219
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,4095,2.6072640419006348
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,4095,0.16087466478347778
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,4095,0.22725866238276163
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,4095,0.1586026648680369
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,4095,0.22632533311843872
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,4095,0.16140799721082053
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,4095,1.1397919654846191
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,4095,0.16570666432380676
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,4095,0.22919466098149618
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,4095,0.15268266201019287
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,4095,0.170250674088796
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,4095,0.25486934185028076
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,4095,0.17139200369517008
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,4095,0.23317867517471313
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,4095,0.22463999191919962
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,4095,0.1686026652654012
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,4095,0.23532267411549887
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,4095,0.3939359982808431
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,4095,1.0468160311381023
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,4095,0.26343466838200885
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,4095,0.16410666704177856
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,4095,0.15985066692034403
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,4095,0.19929067293802896
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,4095,0.240447998046875
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,4095,0.23658132553100586
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,4095,0.16302399833997092
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,4095,0.14711466431617737
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,4095,0.16541866461435953
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,4095,0.16961065928141275
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,4095,0.16732800006866455
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,4095,0.15636799732844034
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,4095,0.2403093377749125
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,4095,0.23704532782236734
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,4095,0.2465333342552185
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,4095,0.15452800194422403
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,4095,0.16126400232315063
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,4095,0.1351146697998047
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,4095,0.23452266057332358
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,4095,0.22666666905085245
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,4095,0.15706666310628256
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,4095,0.16246400276819864
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,4095,0.23389333486557007
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,4095,0.16224533319473267
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,4095,0.24196799596150717
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,4095,0.22769065697987875
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,2047,0.8900000254313151
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,4095,0.7157973448435465
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,4095,0.15169599652290344
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,4095,0.16218666235605875
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,4095,0.22438400983810425
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,4095,0.2274506688117981
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,4095,0.17839999993642172
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,4095,0.22219200929005942
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,4095,0.17061867316563925
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,4095,0.16286399960517883
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,4095,0.2608960072199504
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,4095,0.18304532766342163
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,4095,0.1672053337097168
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,4095,0.16607466340065002
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,4095,0.20708266894022623
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,4095,0.16512533028920492
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,4095,0.22573866446812949
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,4095,0.1634666621685028
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,4095,0.2271626591682434
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,4095,0.16263467073440552
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,4095,0.16416000326474509
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,4095,0.34942400455474854
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,4095,0.16176533699035645
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,4095,0.3893973429997762
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,4095,0.20524267355600992
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,4095,0.16950400670369467
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,4095,0.19554134209950766
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,4095,0.168287992477417
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,4095,0.23321066300074259
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,4095,0.17814399798711142
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,4095,0.23055466016133627
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,4095,0.23220799366633096
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,4095,0.17163199186325073
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,4095,0.16978132724761963
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,4095,0.1725813349088033
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,4095,0.1738613247871399
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,4095,0.22478399674097696
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,4095,0.17378133535385132
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,4095,0.23281600077946982
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,4095,0.16964266697565714
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,4095,0.2280906637509664
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,4095,0.1639840006828308
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,4095,0.15108799934387207
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,4095,0.3050453265508016
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,4095,0.239519993464152
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,4095,0.20240533351898193
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,4095,0.228928009668986
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,4095,0.1937546730041504
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,4095,0.6297973394393921
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,4095,0.18947199980417886
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,4095,0.19107200702031454
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,4095,0.23463465770085654
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,4095,0.18664532899856567
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,4095,0.24267733097076416
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,4095,0.2358186642328898
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,4095,0.18175466855367026
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,4095,0.23226133982340494
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,4095,0.1855199933052063
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,4095,0.21100266774495444
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,4095,0.5557760000228882
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,4095,1.5001707077026367
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,4095,0.3703840176264445
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,4095,0.33957334359486896
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,4095,0.6247573296229044
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,4095,0.5052266518274943
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,4095,0.3494186798731486
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,4095,0.4854559898376465
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,4095,0.3380959828694661
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,4095,0.4843626817067464
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,4095,0.33027732372283936
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,4095,0.32860267162323
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,4095,0.47561601797739667
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,4095,0.3267306685447693
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,4095,1.1770079930623372
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,4095,0.4599733352661133
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,4095,1.099296013514201
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,4095,0.9121119976043701
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,4095,1.018064022064209
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,4095,1.1894986629486084
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,4095,0.6441440184911092
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,4095,0.67030930519104
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,4095,0.7243839899698893
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,4095,0.6291360060373942
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,4095,0.9297760327657064
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,4095,0.9218186537424723
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,4095,0.8900159994761149
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,4095,0.9095359643300375
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,4095,0.6161066691080729
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,4095,0.8789013226826986
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,4095,0.6177759965260824
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,4095,0.8625813325246176
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,4095,2.31057071685791
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,4095,1.9958186149597168
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,4095,1.8275893529256184
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,4095,1.2361866633097331
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,4095,1.2152693271636963
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,4095,1.817893346150716
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,4095,1.1733653545379639
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,4095,1.7276746431986492
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,4095,1.1587999661763508
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,4095,1.7160693804423015
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,8191,0.15846400459607443
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,8191,0.21663999557495117
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,4095,1.152021328608195
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,8191,0.16548800468444824
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,8191,0.21662932634353638
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,8191,0.17076265811920166
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,8191,0.15922133127848306
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,8191,0.16354667147000632
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,8191,0.2222986618677775
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,8191,0.160480002562205
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,8191,2.100698630015055
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,8191,0.2141866683959961
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,8191,0.11858666936556499
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,8191,0.16220800081888834
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,8191,0.15938133001327515
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,8191,0.21202667554219565
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,8191,0.2123146653175354
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,8191,0.16169599692026773
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,8191,0.2392639915148417
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,8191,0.1588320036729177
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,8191,0.16896533966064453
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,8191,0.229802668094635
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,8191,0.21925334135691324
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,8191,0.17944000164667764
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,8191,0.2299519975980123
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,8191,0.16033066312472025
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,8191,0.16036267081896463
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,8191,0.2348639965057373
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,8191,0.23283199469248453
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,4095,4.33242130279541
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,8191,0.1590933303038279
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,8191,0.2344213326772054
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,8191,0.17002665996551514
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,8191,0.2059146761894226
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,8191,0.1599146624406179
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,8191,0.15780799587567648
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,8191,0.3071253299713135
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,8191,0.28598399957021076
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,8191,0.164000004529953
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,8191,0.16008533040682474
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,8191,0.2695733308792114
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,8191,0.27582399050394696
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,8191,0.1660533348719279
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,8191,0.2815413276354472
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,8191,0.15545599659283957
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,8191,0.2725493311882019
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,8191,0.23698665698369345
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,8191,0.15409599741299948
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,4095,1.6978185971577961
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,8191,0.2736053268114726
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,8191,0.2619306643803914
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,4095,1.1506880124409993
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,4095,1.147210677464803
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,8191,0.1711519956588745
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,8191,0.16516799728075662
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,8191,0.29661333560943604
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,8191,0.36075735092163086
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,8191,0.2879839936892192
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,4095,1.6894346872965496
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,8191,0.15449066956837973
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,8191,0.2767039934794108
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,8191,0.1679733395576477
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,8191,0.16129066546758017
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,8191,0.7766400178273519
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,8191,0.2754559914271037
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,8191,0.15929599603017172
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,8191,0.16120533148447672
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,8191,0.27291200558344525
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,8191,0.1549493372440338
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,8191,0.26630399624506634
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,8191,0.16083199779192606
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,8191,0.2275200088818868
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,8191,0.1657813290754954
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,8191,0.16965866088867188
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,8191,0.2255786657333374
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,8191,0.23341333866119385
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,8191,0.16405866543451944
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,8191,0.22844266891479492
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,8191,0.15988799929618835
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,8191,0.227674663066864
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,8191,0.1267146666844686
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,8191,0.17924267053604126
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,8191,0.1269866625467936
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,8191,0.18412800629933676
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,8191,1.030453364054362
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,8191,0.21276267369588217
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,8191,0.17550400892893472
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,8191,0.23364800214767456
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,8191,0.17562667528788248
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,8191,0.22516266504923502
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,8191,0.21973333756128946
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,8191,0.1751733422279358
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,8191,0.22847465674082437
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,8191,0.16982932885487875
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,8191,0.1707680026690165
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,8191,0.22573333978652954
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,8191,0.23120532433191934
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,8191,0.16859199603398642
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,8191,0.16590399543444315
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,8191,0.17247466246287027
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,8191,0.26993600527445477
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,8191,0.19912532965342203
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,8191,0.26659733057022095
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,8191,0.2792479991912842
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,8191,0.2683200041453044
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,8191,0.17915733655293783
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,8191,0.2630293369293213
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,8191,0.17542932430903116
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,8191,0.25998934110005695
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,8191,0.21944000323613486
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,8191,0.2548533280690511
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,8191,0.202890674273173
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,8191,0.25597866376241046
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,8191,0.23210134108861288
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,8191,0.29803733030955
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,8191,0.257914662361145
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,8191,0.2029973268508911
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,8191,0.17550933361053467
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,8191,0.5849759976069132
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,8191,0.5375893513361613
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,8191,0.4256693522135417
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,8191,0.5044533411661783
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,8191,0.47973867257436115
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,8191,0.33220799763997394
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,8191,0.3253759940465291
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,8191,0.46857066949208576
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,8191,0.3293013374010722
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,8191,0.47041598955790204
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,8191,0.3282080094019572
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,8191,0.4660960038503011
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,8191,0.32178133726119995
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,8191,0.46509865919748944
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,8191,0.32400532563527423
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,8191,0.451909343401591
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,8191,1.0011626879374187
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,8191,0.9558826287587484
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,8191,0.6421013275782267
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,8191,0.8937333424886068
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,8191,0.6203306516011556
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,8191,0.9405653476715088
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,8191,0.6072106758753458
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,8191,0.9020373026529948
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,8191,0.6037973165512085
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,8191,0.8808746337890625
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,8191,0.5976266860961914
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,8191,0.5997653404871622
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,8191,3.207461357116699
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,8191,0.5985813140869141
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,8191,1.7171947161356609
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,8191,1.0376746654510498
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,8191,2.9048213958740234
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,8191,1.903765360514323
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,8191,1.2591573397318523
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,8191,1.7965173721313477
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,8191,1.2411093711853027
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,8191,1.7025173505147297
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,8191,1.664453347524007
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,8191,1.1782399813334148
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,8191,1.1719093322753906
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,8191,1.1893493334452312
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,8191,1.6430986722310383
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,8191,1.6584266026814778
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,8191,1.1487092971801758
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,8191,1.647333304087321
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,8191,1.6620853741963704
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,8191,1.16046937306722
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,8191,2.340442657470703
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,8191,3.5411465962727866
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,8191,3.4151252110799155
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,8191,2.2613706588745117
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,8191,3.819957415262858
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,8191,3.3480745951334634
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,8191,3.3223625818888345
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,8191,2.2452799479166665
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,8191,3.3178879419962564
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,16383,0.1650826632976532
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,16383,0.22851200898488364
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,16383,2.0346506436665854
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,16383,0.21689067284266153
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,16383,0.17298666636149088
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,16383,0.21840532620747885
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,8191,2.264906724294027
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,16383,0.21658132473627725
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,16383,0.17484267552693686
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,16383,0.8085599740346273
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,16383,0.2232960065205892
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,16383,0.1586186687151591
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,16383,0.1588479975859324
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,16383,0.2253226637840271
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,16383,0.2132426699002584
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,8191,2.2553812662760415
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,16383,0.12627200285593668
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,16383,1.288149356842041
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,16383,0.16547200083732605
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,16383,0.11270933349927266
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,16383,0.1780746579170227
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,16383,0.1782240072886149
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,16383,0.15971733132998148
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,16383,0.22437334060668945
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,16383,0.16474133729934692
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,8191,2.22051731745402
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,16383,0.22057066361109415
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,8191,3.2062079111735025
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,16383,0.15866133570671082
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,8191,3.2011839548746743
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,16383,0.22542933622996011
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,16383,0.20376000801722208
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,8191,2.2173120180765786
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,16383,0.16210132837295532
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,16383,0.2316746711730957
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,16383,0.1998080015182495
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,16383,0.17202132940292358
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,16383,0.2314186692237854
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,8191,3.285423914591471
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,16383,0.16247999668121338
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,16383,0.22738667329152426
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,16383,0.24814399083455405
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,16383,0.15421332915623984
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,16383,0.2383306622505188
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,16383,0.13963199655214945
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,16383,0.1718026598294576
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,16383,0.23106666405995688
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,16383,0.16048533717791238
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,16383,0.23132266600926718
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,16383,0.15158933401107788
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,16383,0.1743519902229309
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,16383,0.22917866706848145
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,16383,0.23241066932678223
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,16383,0.30355199178059894
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,16383,0.16506666938463846
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,16383,0.1731839974721273
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,16383,0.19236799081166586
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,16383,0.18563199043273926
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,16383,0.22458134094874063
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,16383,1.6197813351949055
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,16383,0.16491732994715372
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,16383,0.23794132471084595
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,16383,0.16061333815256754
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,16383,1.6854292551676433
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,16383,0.23381867011388144
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,16383,0.12833600242932638
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,16383,0.1588746706644694
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,16383,0.23014932870864868
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,16383,0.2346880038579305
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,16383,0.21193599700927734
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,16383,0.8966453075408936
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,16383,0.1706399917602539
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,16383,0.22750933965047201
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,16383,0.23145600159962973
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,16383,0.18022932608922324
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,16383,0.17404800653457642
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,16383,0.1676959991455078
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,16383,0.23684799671173096
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,16383,0.18440000216166177
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,16383,0.17220799128214517
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,16383,0.17585599422454834
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,16383,0.23130667209625244
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,16383,0.2326080004374186
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,16383,0.16605866948763529
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,16383,0.23445866505304971
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,16383,0.20110400517781576
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,16383,0.1763146718343099
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,16383,0.28383467594782513
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,16383,0.2662453254063924
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,16383,0.9053226312001547
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,16383,0.2621600031852722
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,16383,0.48255467414855957
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,16383,0.47284801801045734
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,16383,0.26843200127283734
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,16383,0.475930651028951
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,16383,0.25653332471847534
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,16383,0.2593013246854146
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,16383,0.4620853265126546
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,16383,0.5135680039723715
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,16383,0.26021866003672284
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,16383,0.462661345799764
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,16383,0.46836801369984943
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,16383,0.2587733268737793
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,16383,0.4740053415298462
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,16383,0.4850399891535441
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,16383,0.4728960196177165
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,16383,0.5134506622950236
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,16383,0.4699840148289998
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,16383,0.46810134251912433
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,16383,0.4671200116475423
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,16383,0.4845920006434123
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,16383,0.4856853485107422
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,16383,0.45732800165812176
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,16383,0.6857706705729166
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,16383,0.4651679992675781
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,16383,0.4848693211873372
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,16383,0.4655253489812215
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,16383,0.48507734139760333
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,16383,1.209007978439331
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,16383,0.946997324625651
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,16383,0.6256906588872274
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,16383,0.9172800381978353
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,16383,0.6340800126393636
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,16383,0.8903360366821289
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,16383,0.9441119829813639
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,16383,0.6034559806187948
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,16383,0.8610986868540446
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,16383,0.6015199820200602
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,16383,0.603002667427063
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,16383,0.5960640112559
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,16383,0.8621493180592855
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,16383,0.8553386529286703
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,16383,0.8601973056793213
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,16383,2.3445493380228677
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,16383,0.8357386589050293
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,16383,1.1731306711832683
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,16383,1.15556796391805
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,16383,1.7810079256693523
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,16383,1.7081440289815266
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,16383,1.7706507047017415
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,16383,1.6744640668233235
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,16383,1.1486559708913167
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,16383,3.2937866846720376
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,16383,1.1387786865234375
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,16383,1.6492853164672852
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,16383,1.6357119878133137
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,16383,1.1354026794433594
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,16383,1.1388533115386963
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,16383,1.6405386924743652
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,16383,1.1346506277720134
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,16383,1.618058681488037
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,16383,3.4752321243286133
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,16383,3.5047359466552734
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,16383,3.3615360260009766
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,16383,2.3482613563537598
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,16383,2.2772372563680015
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,16383,3.2880640029907227
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,16383,2.2884532610575357
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,16383,3.252928098042806
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,16383,2.262762705485026
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,16383,3.268656094868978
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,16383,2.228031953175863
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,16383,2.224602699279785
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,16383,3.210026741027832
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,16383,3.2016372680664062
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,16383,2.222661336263021
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,16383,3.2145814895629883
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,16383,6.600266774495442
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,16383,6.943125406901042
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,16383,6.58457628885905
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,16383,4.4683411916097
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,16383,4.486501375834147
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,16383,6.420426686604817
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,16383,4.375253359476726
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,16383,6.336687723795573
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,16383,4.390624046325684
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,32767,0.1033066709836324
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,16383,6.367461522420247
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,32767,0.13767466942469278
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,32767,0.1606559952100118
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,32767,0.2169813315073649
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,32767,0.15940800309181213
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,16383,4.372453371683757
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,16383,6.354037602742513
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,32767,0.1625653306643168
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,16383,4.431344032287598
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,32767,0.22282665967941284
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,32767,0.21829867362976074
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,16383,6.314597447713216
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,32767,0.21539199352264404
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,32767,0.15618133544921875
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,32767,0.21647467215855917
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,32767,0.20188266038894653
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,32767,0.16005866726239523
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,32767,0.21836266915003458
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,32767,0.20025600989659628
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,32767,0.15813333789507547
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,32767,0.17378133535385132
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,32767,0.2569066683451335
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,32767,0.1646666626135508
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,32767,4.495557467142741
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,32767,0.23050665855407715
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,32767,0.24860799312591553
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,32767,0.1710559924443563
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,32767,0.1605226695537567
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,32767,0.22875199715296426
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,32767,0.22753600279490152
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,32767,0.1762453317642212
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,32767,0.22924266258875528
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,32767,0.4950079917907715
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,32767,1.253434658050537
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,32767,0.20230400562286377
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,32767,0.15505599975585938
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,32767,0.1700800061225891
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,32767,0.17387733856836954
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,32767,0.23465067148208618
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,32767,1.4825654029846191
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,32767,0.16061333815256754
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,32767,0.22710400819778442
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,32767,0.18519467115402222
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,32767,0.2196213404337565
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,32767,0.2329919934272766
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,32767,0.1646346648534139
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,32767,0.16806934277216592
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,32767,0.15971733132998148
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,32767,0.2374346653620402
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,32767,0.23019200563430786
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,32767,0.15993600090344748
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,32767,0.20567999283472696
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,32767,0.1751520037651062
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,32767,0.9617066383361816
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,32767,0.16201066970825195
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,32767,0.9457546869913737
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,32767,0.1707306702931722
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,32767,0.1778293251991272
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,32767,0.932032028834025
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,32767,0.22364266713460287
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,32767,0.9470187028249105
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,32767,0.9347360134124756
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,16383,6.279413223266602
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,32767,0.2396106719970703
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,32767,0.19578667481740317
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,32767,0.9272639751434326
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,16383,4.365488052368164
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,32767,0.16674133141835532
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,32767,0.9264586766560873
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,32767,0.9216799736022949
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,32767,1.015397310256958
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,32767,0.26601600646972656
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,32767,0.950767993927002
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,32767,0.2623093326886495
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,32767,0.9529813130696615
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,32767,0.2746933301289876
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,32767,0.2597386638323466
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,32767,1.1006186803181965
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,32767,0.2568639914194743
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,32767,0.9297386805216471
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,32767,0.2755413254102071
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,32767,0.9254613717397054
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,32767,0.26079465945561725
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,32767,0.2540480097134908
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,32767,0.975056012471517
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,32767,0.47569068272908527
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,32767,0.9509600003560384
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,32767,0.4906826814015706
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,32767,0.8900533517201742
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,32767,0.4700533151626587
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,32767,0.8979520003000895
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,32767,0.8769173622131348
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,32767,0.46860265731811523
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,32767,0.8645973205566406
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,32767,0.47098668416341144
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,32767,0.8633333047231039
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,32767,1.1695306301116943
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,32767,0.47058133284250897
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,32767,0.8836800257364908
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,32767,1.8339893023173015
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,32767,0.4654080073038737
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,32767,0.854581356048584
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,32767,0.8822879791259766
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,32767,0.9116373062133789
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,32767,0.8949600060780843
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,32767,0.8834773699442545
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,32767,0.8983413378397623
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,32767,0.9172000090281168
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,32767,1.5836373964945476
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,32767,0.8773866494496664
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,32767,0.850879987080892
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,32767,3.3820266723632812
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,32767,0.8988640308380127
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,32767,0.8436586856842041
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,32767,0.9969653288523356
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,32767,0.8525280157725016
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,32767,0.8897759914398193
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,32767,0.8363146781921387
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,32767,2.2200533548990884
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,32767,1.7332107226053874
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,32767,1.1556800206502278
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,32767,1.15066663424174
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,32767,1.666709264119466
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,32767,1.6567893028259277
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,32767,1.149674654006958
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,32767,1.6572480201721191
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,32767,1.6394666035970051
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,32767,1.1418826580047607
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,32767,1.1498986879984539
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,32767,1.635994593302409
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,32767,1.6388319333394368
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,32767,1.6117332776387532
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,32767,1.1567520300547283
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,32767,1.9945386250813801
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,32767,3.3614346186319985
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,32767,3.199413299560547
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,32767,2.2881813049316406
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,32767,3.3089599609375
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,32767,2.234330654144287
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,32767,3.261631965637207
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,32767,2.224613348642985
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,32767,4.18343989054362
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,32767,3.1914774576822915
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,32767,2.2351039250691733
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,32767,3.168976147969564
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,32767,2.2112000783284507
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,32767,2.2075254122416177
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,32767,2.2110506693522134
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,32767,3.190773328145345
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,32767,3.21232541402181
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,32767,6.608640034993489
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,32767,6.685706456502278
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,32767,4.482144037882487
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,32767,6.466592152913411
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,32767,4.454965273539226
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,32767,6.369344075520833
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,32767,4.431040128072103
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,32767,6.323567708333333
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,32767,4.612586657206218
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,32767,6.270463943481445
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,32767,4.381487846374512
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,65535,0.16847999890645346
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,65535,0.21280533075332642
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,65535,0.165610671043396
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,65535,0.2132800022761027
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,65535,0.1655893325805664
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,65535,0.6578666766484579
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,65535,0.15677332878112793
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,65535,0.21412799755732217
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,65535,0.16520532965660095
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,65535,0.2135466734568278
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,65535,0.16148266196250916
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,65535,0.1588053305943807
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,65535,0.21701333920160928
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,65535,0.1665493349234263
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,65535,0.25031999746958417
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,65535,0.20974934101104736
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,65535,0.16352533300717673
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,65535,0.16841065883636475
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,65535,0.2205280065536499
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,65535,1.5258827209472656
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,65535,0.1653279960155487
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,65535,0.22642133633295694
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,65535,0.23102933168411255
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,65535,0.16723199685414633
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,65535,0.16244799892107645
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,32767,6.326335906982422
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,65535,0.40991465250651044
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,65535,0.22791999578475952
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,65535,0.23164800802866617
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,65535,0.16791999340057373
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,65535,0.1600106656551361
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,65535,0.1588159998257955
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,65535,0.20368534326553345
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,32767,4.40012804667155
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,65535,0.17866132656733194
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,65535,0.1702186663945516
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,65535,1.7903413772583008
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,65535,1.859125296274821
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,65535,0.17520533005396524
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,65535,1.8126026789347331
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,65535,0.17409066359202066
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,65535,1.7915627161661785
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,65535,0.16698666413625082
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,65535,0.181002676486969
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,65535,1.7772266070048015
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,65535,0.17235199610392252
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,65535,1.7870880762736003
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,65535,1.76911465326945
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,65535,0.17070933183034262
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,65535,1.7691094080607097
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,65535,0.27699732780456543
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,65535,0.2621493339538574
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,65535,1.807125409444173
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,32767,6.34391975402832
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,65535,1.8064266840616863
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,65535,0.2680160005887349
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,65535,0.25832533836364746
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,65535,1.7868159612019856
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,32767,4.3936106363932295
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,65535,1.7726346651713054
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,32767,6.321829477945964
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,65535,0.25763734181722003
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,65535,0.26731733481089276
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,65535,1.7932319641113281
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,65535,0.26814399162928265
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,65535,1.7783093452453613
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,65535,0.2566506663958232
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,65535,1.7781012852986653
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,65535,1.8140800793965657
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,65535,0.4849119981129964
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,65535,0.47548266251881915
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,65535,1.8057600657145183
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,65535,1.8097119331359863
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,65535,0.46862932046254474
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,65535,1.7833546002705891
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,65535,0.4671359856923421
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,65535,0.4670399824778239
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,65535,1.7782773971557617
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,65535,0.48315731684366864
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,65535,1.7868000666300456
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,65535,0.4673546552658081
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,65535,1.769360065460205
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,65535,0.46767465273539227
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,65535,1.7660533587137859
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,65535,1.7529120445251465
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,65535,0.9269493420918783
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,65535,0.9011572996775309
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,65535,1.6611040433247883
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,65535,0.8917013009389242
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,65535,1.693552017211914
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,65535,1.6387732823689778
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,65535,0.9184160232543945
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,65535,1.6375412940979004
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,65535,0.8913599650065104
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,65535,1.6487092971801758
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,65535,1.6317119598388672
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,65535,1.6481173833211262
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,65535,0.8903840382893881
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,65535,0.8878400325775146
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,65535,0.9234933058420817
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,65535,1.6561546325683594
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,65535,1.7567200660705566
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,65535,1.7734400431315105
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,65535,2.277189254760742
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,65535,1.722431977589925
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,65535,1.6686347325642903
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,65535,1.7115786870320637
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,65535,1.655471960703532
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,65535,1.6705387433369954
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,65535,1.709536075592041
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,65535,1.7716107368469238
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,65535,2.1421759923299155
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,65535,1.8042826652526855
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,65535,1.6317440668741863
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,65535,1.663610617319743
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,65535,1.7055573463439941
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,65535,1.6501065889994304
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,65535,3.262629191080729
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,65535,3.2676852544148765
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,65535,2.2777387301127114
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,65535,3.2262932459513345
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,65535,3.184368133544922
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,65535,3.22161070505778
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,65535,2.8930772145589194
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,65535,2.225381374359131
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,65535,2.2670346895853677
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,65535,3.2375466028849282
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,65535,2.2344160079956055
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,65535,3.186944007873535
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,65535,2.2393013636271157
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,65535,3.185546557108561
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,65535,2.2413973808288574
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,65535,3.1827732721964517
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,65535,6.551279703776042
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,65535,4.41372807820638
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,65535,6.427466710408528
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,65535,6.307973225911458
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,65535,4.374287923177083
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,65535,4.423328081766765
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,65535,6.328789393107097
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,65535,6.35264523824056
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,65535,4.359701474507649
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,65535,4.400032043457031
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,131071,0.17633066574732462
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,131071,0.2211946646372477
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,131071,0.16858132680257162
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,131071,0.21963733434677124
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,131071,0.1685439944267273
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,131071,0.1733013391494751
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,131071,0.21737066904703775
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,131071,0.1715946594874064
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,131071,0.2251573403676351
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,131071,0.21628799041112265
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,131071,0.16554666558901468
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,131071,1.1704213619232178
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,131071,0.21202667554219565
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,131071,0.171834667523702
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,131071,0.7335626284281412
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,131071,0.20260266462961832
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,65535,6.329242706298828
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,131071,0.18283732732137045
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,131071,0.17825599511464438
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,131071,3.538933436075846
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,131071,3.5508960088094077
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,131071,0.1760586698849996
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,131071,0.16964799165725708
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,131071,3.517711957295736
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,131071,0.17856534322102866
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,131071,3.4897387822469077
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,131071,3.52346134185791
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,131071,0.1765600045522054
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,65535,4.367397308349609
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,131071,3.521434783935547
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,131071,0.1718613306681315
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,131071,3.498986562093099
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,131071,0.16674133141835532
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,131071,3.498863855997721
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,131071,0.2862933278083801
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,131071,0.2764906684557597
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,131071,3.5319201151529946
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,131071,0.27532267570495605
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,65535,6.296314875284831
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,131071,0.28309333324432373
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,65535,6.281589508056641
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,131071,3.492794672648112
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,131071,3.5064798990885415
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,131071,0.2693919936815898
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,131071,0.2730399966239929
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,131071,3.4620641072591147
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,65535,6.295167922973633
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,65535,4.34608523050944
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,131071,0.2813280026117961
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,131071,3.4907360076904297
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,131071,0.26926400264104206
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,131071,3.5327253341674805
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,131071,3.5199254353841147
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,131071,0.485370675722758
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,131071,0.4782559871673584
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,131071,3.478330612182617
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,131071,0.4916906754175822
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,131071,0.48608001073201496
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,131071,3.474186579386393
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,131071,3.5263360341389975
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,131071,0.4914026657740275
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,131071,3.4959306716918945
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,131071,0.46437867482503253
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,131071,3.528458595275879
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,131071,0.4647839864095052
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,131071,3.491994539896647
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,131071,0.4649653434753418
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,131071,3.4979947408040366
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,131071,3.4875574111938477
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,131071,3.4810187021891275
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,131071,0.9241013526916504
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,131071,0.8778026898701986
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,131071,5.193525314331055
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,131071,0.9003360271453857
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,131071,0.8876586755116781
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,131071,3.5051094690958657
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,131071,3.4912586212158203
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,131071,3.5438613891601562
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,131071,3.4813547134399414
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,131071,0.8893919785817465
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,131071,0.9300853411356608
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,131071,0.880400021870931
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,131071,0.8857440153757731
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,131071,3.4821065266927085
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,131071,3.488330523173014
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,131071,3.3672107060750327
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,131071,1.7922666867574055
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,131071,3.2223199208577475
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,131071,1.7423733075459797
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,131071,3.241194725036621
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,131071,1.7553706169128418
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,131071,3.289386749267578
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,131071,1.6966986656188965
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,131071,3.2141440709431968
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,131071,1.7047947247823079
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,131071,1.7193493843078613
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,131071,3.245936075846354
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,131071,3.258906682332357
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,131071,1.7057867050170898
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,131071,4.1681013107299805
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,131071,1.6988426844278972
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,131071,3.1766878763834634
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,131071,3.268490791320801
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,131071,3.239199956258138
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,131071,3.4099038441975913
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,131071,3.3803733189900718
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,131071,3.2513440450032554
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,131071,3.368058522542318
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,131071,3.1939732233683267
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,131071,3.341957410176595
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,131071,4.144997278849284
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,131071,3.96889591217041
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,131071,3.539370536804199
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,131071,3.167231877644857
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,131071,3.394709269205729
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,131071,3.3839680353800454
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,131071,3.188847859700521
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,131071,3.9692907333374023
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,131071,6.480223973592122
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,131071,6.3817494710286455
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,131071,6.278303782145183
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,131071,4.4616851806640625
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,131071,4.382757186889648
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,131071,4.43449052174886
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,131071,6.315999984741211
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,131071,4.408944129943848
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,131071,6.2598826090494795
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,131071,4.535653432210286
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,131071,6.303386688232422
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,131071,6.302090962727864
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,131071,4.461114565531413
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,131071,6.27996826171875
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,131071,4.390922546386719
VLLM,0.12.0,NVIDIA H200,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,131071,6.260725021362305
