framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,16,1,0,0.01531360000371933
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,16,2,0,0.020654399693012238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,16,4,0,0.020585599541664123
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,16,8,0,0.020585599541664123
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,16,16,0,0.02017440050840378
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,16,32,0,0.019926400482654573
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,1,16,64,0,0.01958879977464676
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,32,1,0,0.01526080071926117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,1,16,64,0,0.012956799566745758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,16,4,0,0.014153599739074707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,16,8,0,0.014115199446678162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,16,16,0,0.013689599931240082
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,32,4,0,0.013963200151920319
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,16,2,0,0.014052799344062806
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,16,32,0,0.013121600449085235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,32,2,0,0.015300799906253815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,16,1,0,0.021267199516296388
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,32,8,0,0.014206400513648987
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,32,2,0,0.021062399446964263
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,32,16,0,0.014073599874973298
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,32,16,0,0.020729599893093108
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,32,32,0,0.013631999492645264
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,1,32,64,0,0.013152000308036805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,32,1,0,0.022228799760341644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,32,4,0,0.0205935999751091
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,32,32,0,0.0201664000749588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,32,8,0,0.020815999805927278
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,64,1,0,0.015662400424480437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,1,32,64,0,0.019939200580120088
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,64,2,0,0.01594880074262619
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,64,4,0,0.014287999272346497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,64,8,0,0.014004799723625182
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,64,16,0,0.014193600416183472
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,64,32,0,0.013993600010871887
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,64,1,0,0.02332960069179535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,1,64,64,0,0.013822400569915771
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,64,2,0,0.022609600424766542
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,64,4,0,0.02102559953927994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,64,8,0,0.020899200439453126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,64,16,0,0.02072319984436035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,64,32,0,0.020750400424003602
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,1,64,64,0,0.02048960030078888
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,128,1,0,0.01823360025882721
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,1,128,64,0,0.014795200526714325
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,128,2,0,0.01783040016889572
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,128,4,0,0.017030400037765504
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,128,8,0,0.015936000645160674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,128,16,0,0.015992000699043274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,128,32,0,0.015744000673294067
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,128,1,0,0.025062400102615356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,128,2,0,0.024532799422740937
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,128,4,0,0.02383359968662262
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,128,8,0,0.02319519966840744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,128,16,0,0.02301120012998581
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,128,32,0,0.022752000391483305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,1,128,64,0,0.021537600457668303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,256,1,0,0.030825600028038025
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,256,2,0,0.021512000262737273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,256,4,0,0.0204815998673439
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,256,8,0,0.01927199959754944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,256,16,0,0.019406400620937347
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,256,32,0,0.019307200610637665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,1,256,64,0,0.018196800351142885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,256,1,0,0.04088320136070252
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,256,2,0,0.028428798913955687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,256,4,0,0.027521601319313048
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,256,8,0,0.02743520140647888
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,256,16,0,0.02638239860534668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,256,32,0,0.02621760070323944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,1,256,64,0,0.02496960014104843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,512,1,0,0.07117760181427002
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,512,2,0,0.03851040005683899
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,512,4,0,0.027508801221847533
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,512,8,0,0.036108800768852235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,512,16,0,0.0288783997297287
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,512,32,0,0.024188800156116484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,1,512,64,0,0.023559999465942384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,512,1,0,0.08135679960250855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,512,2,0,0.04988479912281037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,512,4,0,0.035630398988723756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,512,16,0,0.0367935985326767
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,512,8,0,0.04594239890575409
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,512,32,0,0.032364800572395325
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,1,512,64,0,0.030881598591804504
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,1024,1,0,0.17875839471817018
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,1024,2,0,0.09605439901351928
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,1024,4,0,0.052078402042388915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,1024,8,0,0.04133279919624329
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,1024,16,0,0.04447839856147766
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,1024,32,0,0.032579201459884646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,1,1024,64,0,0.028214401006698607
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,1024,1,0,0.189300799369812
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,1024,2,0,0.10576000213623046
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,1024,4,0,0.06325600147247315
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,1024,8,0,0.04959039986133575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,1024,32,0,0.04285280108451843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,1024,16,0,0.054127997159957884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,1,1024,64,0,0.03627200126647949
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,1536,1,0,0.33359038829803467
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,1536,2,0,0.17519359588623046
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,1536,4,0,0.09707840085029602
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,1536,8,0,0.05533760190010071
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,1536,32,0,0.04397920072078705
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,1536,16,0,0.06183519959449768
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,1,1536,64,0,0.03505280017852783
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,1536,1,0,0.3455696105957031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,1536,2,0,0.18551679849624633
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,1,1536,64,0,0.044740799069404605
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,1536,4,0,0.10695040225982666
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,1536,8,0,0.06571360230445862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,1536,16,0,0.07432799935340881
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,2048,8,0,0.07819039821624756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,2048,16,0,0.06875200271606445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,1536,32,0,0.05662879943847656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,2048,2,0,0.27697598934173584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,2048,4,0,0.14472960233688353
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,2048,1,0,0.5380943775177002
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,2048,32,0,0.05957440137863159
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,1,2048,64,0,0.04196160137653351
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,2048,2,0,0.2913151979446411
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,2048,1,0,0.5443344116210938
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,2048,4,0,0.15630240440368653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,2048,8,0,0.0925055980682373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,2048,16,0,0.0796671986579895
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,2048,32,0,0.07346879839897155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,3072,4,0,0.28055360317230227
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,1,2048,64,0,0.05439839959144592
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,3072,2,0,0.5506432056427002
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,3072,8,0,0.1645807981491089
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,3072,16,0,0.09942560195922852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,3072,32,0,0.08338879942893981
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,1,3072,64,0,0.0610368013381958
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,3072,1,0,1.058894443511963
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,3072,4,0,0.30027520656585693
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,3072,2,0,0.5596496105194092
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,3072,8,0,0.1770416021347046
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,3072,16,0,0.1128432035446167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,3072,32,0,0.09711359739303589
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,3072,1,0,1.0733839988708496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,1,3072,64,0,0.07688959836959838
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,4096,8,0,0.24683680534362792
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,4096,32,0,0.12201119661331176
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,4096,4,0,0.4774752140045166
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,4096,16,0,0.13539520502090455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,1,4096,64,0,0.09166240096092224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,4096,2,0,0.9214096069335938
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,4096,1,0,1.8009199142456054
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,4096,8,0,0.2638623952865601
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,4096,4,0,0.47587199211120607
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,4096,16,0,0.15138720273971557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,4096,32,0,0.14045759439468383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,4096,2,0,0.9395440101623536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,1,4096,64,0,0.10963519811630248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,4096,1,0,1.7809183120727539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,6144,16,0,0.28973119258880614
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,6144,8,0,0.5079071998596192
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,6144,32,0,0.1836511969566345
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,1,6144,64,0,0.1307536005973816
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,6144,4,0,0.9818991661071778
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,6144,2,0,2.0028656005859373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,6144,16,0,0.3112368106842041
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,6144,8,0,0.5166863918304443
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,6144,32,0,0.21221280097961426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,6144,4,0,0.9973024368286133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,1,6144,64,0,0.15236959457397461
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,6144,2,0,1.9565263748168946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,8192,16,0,0.4630576133728027
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,8192,32,0,0.24700798988342285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,8192,8,0,0.8726976394653321
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,6144,1,0,4.118465423583984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,1,8192,64,0,0.2416304111480713
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,8192,4,0,1.7381919860839843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,6144,1,0,4.026180648803711
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,8192,16,0,0.527729606628418
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,8192,8,0,0.8891360282897949
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,8192,32,0,0.28433120250701904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,1,8192,64,0,0.2740080118179321
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,8192,2,0,3.615526580810547
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,8192,4,0,1.7220272064208983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,10240,16,0,0.7140367984771728
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,8192,2,0,3.4272991180419923
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,10240,8,0,1.3765695571899415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,10240,32,0,0.3972928047180176
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,1,10240,64,0,0.31809918880462645
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,10240,4,0,2.787335968017578
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,8192,1,0,7.504153442382813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,10240,16,0,0.742409610748291
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,10240,8,0,1.437617588043213
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,8192,1,0,7.122230529785156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,10240,32,0,0.4229231834411621
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,10240,4,0,2.7379072189331053
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,1,10240,64,0,0.3368016004562378
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,10240,2,0,5.739276885986328
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,12288,16,0,0.9763744354248047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,10240,2,0,5.594297790527344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,12288,8,0,1.9572399139404297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,12288,32,0,0.5589407920837403
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,1,12288,64,0,0.37522718906402586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,12288,4,0,3.895801544189453
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,10240,1,0,11.841989135742187
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,12288,8,0,2.074131202697754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,12288,4,0,3.9722736358642576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,10240,1,0,10.83815689086914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,12288,16,0,1.073960018157959
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,12288,32,0,0.6255296230316162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,12288,2,0,8.218547058105468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,1,12288,64,0,0.3885663986206055
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,12288,2,0,7.868996429443359
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,16384,8,0,3.5108993530273436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,16384,16,0,1.7250400543212892
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,16384,32,0,0.972652816772461
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,1,16384,64,0,0.5040624141693115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,16384,4,0,7.128508758544922
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,12288,1,0,16.266561889648436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,16384,16,0,1.6991743087768554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,12288,1,0,15.603651428222657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,1,16384,64,0,0.5295648097991943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,16384,32,0,0.9410528182983399
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,16,1,0,0.016625599563121797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,16384,8,0,3.400012969970703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,16,4,0,0.0143312007188797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,16,2,0,0.015761600434780122
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,16,8,0,0.014020800590515137
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,16,16,0,0.013817599415779114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,2,16,64,0,0.01290079951286316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,16,32,0,0.013715200126171112
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,16,1,0,0.022495999932289124
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,16,2,0,0.02110559940338135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,16,4,0,0.02128479927778244
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,16,8,0,0.020771199464797975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,16,16,0,0.01993280053138733
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,16,32,0,0.020257599651813507
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,2,16,64,0,0.019606399536132812
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,32,1,0,0.01518079936504364
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,32,2,0,0.015568000078201295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,32,4,0,0.014297600090503692
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,32,8,0,0.013844799995422364
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,32,16,0,0.014289599657058717
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,32,32,0,0.013687999546527862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,2,32,64,0,0.012873600423336028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,32,1,0,0.023577600717544556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,16384,4,0,6.933500671386719
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,16384,2,0,14.787638854980468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,32,2,0,0.022881600260734557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,32,16,0,0.02473759949207306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,32,32,0,0.02272160053253174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,32,4,0,0.020739200711250304
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,32,8,0,0.02115679979324341
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,2,32,64,0,0.019460800290107726
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,64,1,0,0.01770240068435669
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,64,2,0,0.017579199373722078
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,64,4,0,0.015695999562740325
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,64,8,0,0.014689600467681885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,64,16,0,0.014446400105953217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,64,32,0,0.014716799557209014
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,64,4,0,0.023203200101852416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,2,64,64,0,0.01579360067844391
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,64,1,0,0.023824000358581544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,64,32,0,0.020923200249671935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,64,2,0,0.023148800432682037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,64,8,0,0.021700799465179443
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,64,16,0,0.021107199788093566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,2,64,64,0,0.020497600734233856
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,128,1,0,0.030211201310157774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,128,2,0,0.01834080070257187
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,128,4,0,0.018166400492191315
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,128,8,0,0.01733600050210953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,128,16,0,0.016152000427246092
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,128,32,0,0.015910400450229643
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,2,128,64,0,0.015478399395942689
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,128,1,0,0.03729600012302399
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,16384,2,0,14.84891357421875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,128,2,0,0.02598559856414795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,128,4,0,0.02439360022544861
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,128,8,0,0.024611200392246246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,128,16,0,0.024278399348258973
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,128,32,0,0.02328319996595383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,2,128,64,0,0.02290239930152893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,256,1,0,0.05809119939804077
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,256,2,0,0.03245440125465393
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,256,4,0,0.021916800737380983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,256,8,0,0.0212351992726326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,256,16,0,0.021342399716377258
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,256,32,0,0.020300799608230592
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,2,256,64,0,0.019057600200176238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,256,1,0,0.06816800236701966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,256,2,0,0.04297919869422913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,256,4,0,0.029659199714660644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,256,8,0,0.02884320020675659
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,256,16,0,0.028233599662780762
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,256,32,0,0.027534401416778563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,2,256,64,0,0.02612000107765198
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,512,8,0,0.033790400624275206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,512,1,0,0.12970880270004273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,512,2,0,0.07180479764938355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,512,4,0,0.04019519984722138
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,512,16,0,0.03889279961585999
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,512,32,0,0.030558401346206666
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,2,512,64,0,0.025593599677085875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,512,1,0,0.1402783989906311
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,512,2,0,0.08246240019798279
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,512,4,0,0.0514303982257843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,512,8,0,0.0427839994430542
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,512,16,0,0.050355201959609984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,512,32,0,0.03930239975452423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,2,512,64,0,0.03412159979343414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,1024,1,0,0.33901760578155515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,1024,2,0,0.17814240455627442
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,1024,4,0,0.0961184024810791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,1024,8,0,0.05444480180740356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,1024,16,0,0.04966239929199219
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,1024,32,0,0.047774401307106015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,2,1024,64,0,0.03611679971218109
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,1024,1,0,0.349449610710144
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,1024,2,0,0.19095519781112671
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,1024,4,0,0.10974880456924438
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,1024,8,0,0.06913120150566102
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,1024,16,0,0.06060799956321716
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,1024,32,0,0.0611519992351532
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,2,1024,64,0,0.04747360050678253
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,1536,2,0,0.33798561096191404
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,1536,4,0,0.17514560222625733
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,16384,1,0,30.04216613769531
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,1536,8,0,0.09955999851226807
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,1536,1,0,0.6545072078704834
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,1536,16,0,0.06755200028419495
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,1536,32,0,0.07320320010185241
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,2,1536,64,0,0.05014079809188843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,1536,2,0,0.34928479194641116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,1536,4,0,0.19147679805755616
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,1536,8,0,0.11414719820022583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,1536,1,0,0.697379207611084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,1536,16,0,0.08350239992141724
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,1536,32,0,0.08911839723587037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,2,1536,64,0,0.06483200192451477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,2048,4,0,0.29166080951690676
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,2048,16,0,0.08490399718284607
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,2048,2,0,0.5375807762145997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,2048,8,0,0.14987679719924926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,2048,32,0,0.07954400181770324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,2,2048,64,0,0.06646879911422729
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,2048,1,0,1.0525055885314942
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,16384,1,0,28.53221435546875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,2048,4,0,0.29238080978393555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,2048,2,0,0.5522992134094238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,2048,8,0,0.16535520553588867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,2048,16,0,0.10038880109786988
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,2048,1,0,1.0596847534179688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,2048,32,0,0.09399359822273254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,2,2048,64,0,0.08683680295944214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,3072,8,0,0.2905119895935059
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,3072,16,0,0.16878559589385986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,3072,4,0,0.5526576042175293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,3072,32,0,0.11719039678573609
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,2,3072,64,0,0.10437599420547486
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,3072,2,0,1.0959936141967774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,3072,8,0,0.3121488094329834
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,3072,16,0,0.18811359405517578
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,3072,4,0,0.5639599800109864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,3072,32,0,0.13692159652709962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,3072,1,0,2.17663688659668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,2,3072,64,0,0.12881280183792115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,3072,2,0,1.0918607711791992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,3072,1,0,2.1197135925292967
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,4096,32,0,0.14994399547576903
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,4096,8,0,0.4869584083557129
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,4096,16,0,0.2795135974884033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,2,4096,64,0,0.14448000192642213
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,4096,4,0,0.9285840034484864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,4096,2,0,1.8256479263305665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,4096,16,0,0.2808736085891724
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,4096,8,0,0.5116064071655273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,4096,32,0,0.18361120223999022
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,4096,4,0,0.9352864265441895
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,2,4096,64,0,0.18846559524536133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,4096,2,0,1.8326351165771484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,6144,16,0,0.5921616077423095
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,4096,1,0,3.7478736877441405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,6144,8,0,1.0277392387390136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,6144,32,0,0.3232352018356323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,2,6144,64,0,0.23538560867309571
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,6144,4,0,2.036465644836426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,4096,1,0,3.719883346557617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,6144,16,0,0.596123218536377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,6144,8,0,1.055777645111084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,6144,32,0,0.35049760341644287
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,2,6144,64,0,0.2553071975708008
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,6144,4,0,2.04715518951416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,6144,2,0,4.279000091552734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,8192,16,0,0.9640447616577148
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,6144,2,0,4.233335876464844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,8192,8,0,1.8418960571289062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,8192,32,0,0.49978079795837405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,2,8192,64,0,0.2849231958389282
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,8192,4,0,3.613828659057617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,6144,1,0,8.56863021850586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,8192,8,0,1.8675487518310547
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,8192,16,0,0.9710127830505371
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,6144,1,0,8.172003173828125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,2,8192,64,0,0.3083456039428711
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,8192,32,0,0.5524208068847656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,8192,4,0,3.6888607025146483
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,8192,2,0,7.563104248046875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,8192,2,0,7.184913635253906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,10240,8,0,2.7857776641845704
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,10240,32,0,0.7518335819244385
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,10240,16,0,1.4054832458496094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,2,10240,64,0,0.44013118743896484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,10240,4,0,5.9158576965332035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,8192,1,0,14.894314575195313
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,10240,16,0,1.4137696266174316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,10240,8,0,2.686800003051758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,8192,1,0,14.20341796875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,2,10240,64,0,0.4968751907348633
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,10240,32,0,0.7915056228637696
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,10240,2,0,11.682527923583985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,10240,4,0,5.686713409423828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,12288,16,0,2.099798393249512
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,12288,8,0,4.016059112548828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,12288,32,0,1.1198991775512694
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,10240,2,0,11.494087982177735
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,2,12288,64,0,0.6070720195770264
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,12288,4,0,8.322747039794923
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,10240,1,0,23.724359130859376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,12288,8,0,4.143830490112305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,12288,32,0,1.1734880447387694
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,12288,4,0,8.547991943359374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,12288,16,0,2.1954864501953124
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,2,12288,64,0,0.6816112041473389
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,12288,2,0,16.342449951171876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,10240,1,0,22.581739807128905
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,16384,16,0,3.8049392700195312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,12288,2,0,16.274871826171875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,16384,32,0,1.973099136352539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,16384,8,0,7.161785888671875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,2,16384,64,0,0.9427023887634277
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,16384,4,0,14.968380737304688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,12288,1,0,35.088531494140625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,16384,16,0,3.487081527709961
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,16384,8,0,7.1132560729980465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,16384,4,0,14.324832153320312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,12288,1,0,33.165899658203124
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,16,1,0,0.016814400255680085
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,16,4,0,0.01557600051164627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,16,2,0,0.0159168004989624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,16,8,0,0.015486399829387664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,2,16384,64,0,1.008612823486328
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,16,16,0,0.014273600280284881
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,16,32,0,0.014561599493026734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,4,16,64,0,0.013219200074672699
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,16384,32,0,1.7778928756713868
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,16,1,0,0.023174400627613067
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,16,2,0,0.024115200340747833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,16,4,0,0.0213359996676445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,16,8,0,0.021670399606227873
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,16,16,0,0.020286400616168977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,16,32,0,0.01961439996957779
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,4,16,64,0,0.019838400185108185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,32,1,0,0.0172447994351387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,32,2,0,0.015620799362659454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,32,4,0,0.015244799852371215
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,32,8,0,0.014239999651908874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,32,16,0,0.014774399995803832
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,32,32,0,0.013729600608348847
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,4,32,64,0,0.013091200590133667
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,32,1,0,0.023695999383926393
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,32,2,0,0.023491199314594268
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,32,4,0,0.022307200729846953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,32,8,0,0.02111999988555908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,32,16,0,0.02078399956226349
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,32,32,0,0.02072640061378479
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,4,32,64,0,0.01987359970808029
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,64,1,0,0.02787359952926636
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,16384,2,0,29.133346557617188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,64,2,0,0.017046399414539337
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,64,4,0,0.016705599427223206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,64,32,0,0.01642560064792633
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,64,8,0,0.016353599727153778
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,64,16,0,0.014766399562358857
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,4,64,64,0,0.014596800506114959
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,64,1,0,0.040612798929214475
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,64,2,0,0.02531839907169342
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,64,4,0,0.02330880016088486
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,64,8,0,0.023416000604629516
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,64,16,0,0.021859200298786165
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,64,32,0,0.021740800142288207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,4,64,64,0,0.021096000075340272
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,128,1,0,0.05220479965209961
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,128,2,0,0.02990399897098541
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,128,4,0,0.01910720020532608
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,128,8,0,0.018539200723171233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,128,16,0,0.01796319931745529
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,128,32,0,0.01722559928894043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,4,128,64,0,0.016542400419712066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,128,1,0,0.0626800000667572
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,128,2,0,0.04103359878063202
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,128,4,0,0.026926401257514953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,128,8,0,0.025519999861717223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,128,16,0,0.02540160119533539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,128,32,0,0.024638399481773376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,4,128,64,0,0.02324160039424896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,256,1,0,0.10249760150909423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,256,2,0,0.05979040265083313
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,256,4,0,0.035955199599266054
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,256,8,0,0.023475199937820435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,256,16,0,0.02274720072746277
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,256,32,0,0.022716799378395082
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,256,1,0,0.11448800563812256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,4,256,64,0,0.021559999883174898
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,256,2,0,0.07090880274772644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,256,4,0,0.04761599898338318
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,256,8,0,0.031860798597335815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,256,16,0,0.030707201361656188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,256,32,0,0.030321601033210754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,4,256,64,0,0.028808000683784484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,512,1,0,0.23785600662231446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,512,2,0,0.12857919931411743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,512,4,0,0.07213760018348694
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,512,8,0,0.05717920064926148
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,512,16,0,0.03777279853820801
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,512,32,0,0.04131680130958557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,4,512,64,0,0.033687999844551085
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,512,1,0,0.2477871894836426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,16384,2,0,29.039813232421874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,512,2,0,0.14261599779129028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,512,4,0,0.08597760200500489
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,512,8,0,0.07117919921875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,512,16,0,0.04783360064029694
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,4,512,64,0,0.045731198787689206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,512,32,0,0.057918399572372437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,1024,2,0,0.34488959312438966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,1024,1,0,0.6512447834014893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,1024,4,0,0.18013279438018798
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,1024,8,0,0.10159840583801269
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,1024,16,0,0.07970560193061829
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,1024,32,0,0.05608479976654053
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,4,1024,64,0,0.05507680177688599
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,1024,4,0,0.1950063943862915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,1024,2,0,0.3590496063232422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,1024,1,0,0.6663856029510498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,1024,8,0,0.12017920017242431
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,1024,16,0,0.09793279767036438
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,1024,32,0,0.07187520265579224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,4,1024,64,0,0.07236319780349731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,1536,8,0,0.1813264012336731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,1536,4,0,0.33367199897766114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,1536,16,0,0.12330080270767212
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,1536,2,0,0.6674079895019531
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,1536,1,0,1.2845616340637207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,1536,32,0,0.08858240246772767
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,4,1536,64,0,0.08578559756278992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,1536,8,0,0.2064591884613037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,1536,4,0,0.3738464117050171
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,1536,16,0,0.14360640048980713
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,1536,2,0,0.6886367797851562
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,1536,32,0,0.11265280246734619
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,4,1536,64,0,0.10808800458908081
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,1536,1,0,1.2860464096069335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,2048,16,0,0.165011203289032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,2048,8,0,0.28887200355529785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,2048,4,0,0.5455423831939697
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,2048,32,0,0.13082879781723022
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,2048,2,0,1.0555376052856444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,4,2048,64,0,0.09609439969062805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,2048,8,0,0.3144128084182739
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,2048,1,0,2.175444793701172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,2048,16,0,0.18953919410705566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,2048,4,0,0.5671743869781494
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,2048,32,0,0.15594559907913208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,2048,2,0,1.0862367630004883
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,4,2048,64,0,0.12611520290374756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,2048,1,0,2.0933727264404296
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,3072,16,0,0.3249840021133423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,3072,8,0,0.5732592105865478
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,3072,32,0,0.21149759292602538
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,4,3072,64,0,0.15756800174713134
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,3072,4,0,1.089857578277588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,3072,2,0,2.2728288650512694
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,3072,8,0,0.6009632110595703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,3072,4,0,1.1205327987670899
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,3072,16,0,0.3844304084777832
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,3072,32,0,0.2427072048187256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,4,3072,64,0,0.188427197933197
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,3072,2,0,2.1987632751464843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,3072,1,0,4.611452865600586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,4096,8,0,0.9736528396606445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,4096,16,0,0.5350272178649902
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,4096,4,0,1.9355567932128905
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,4096,32,0,0.29591200351715086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,3072,1,0,4.428704071044922
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,4,4096,64,0,0.23815839290618895
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,4096,2,0,3.899576187133789
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,4096,8,0,0.9970047950744629
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,4096,4,0,1.8692495346069335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,4096,32,0,0.3311647891998291
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,4096,16,0,0.5738768100738525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,4,4096,64,0,0.2941440105438232
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,4096,2,0,3.8510929107666017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,4096,1,0,7.884371185302735
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,6144,8,0,2.088313674926758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,16384,1,0,59.813201904296875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,6144,16,0,1.0676303863525392
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,6144,4,0,4.306579208374023
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,4,6144,64,0,0.40293121337890625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,6144,32,0,0.6365968227386475
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,4096,1,0,7.825299072265625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,6144,8,0,2.128233528137207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,6144,2,0,8.657816314697266
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,16384,1,0,56.8578125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,6144,16,0,1.1490079879760742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,6144,4,0,4.116340637207031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,6144,32,0,0.6744480133056641
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,4,6144,64,0,0.46332478523254395
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,8192,16,0,1.8347295761108398
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,6144,2,0,8.343287658691406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,8192,32,0,1.0016752243041993
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,8192,8,0,3.670907211303711
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,4,8192,64,0,0.5355408191680908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,6144,1,0,17.052589416503906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,8192,4,0,7.792998504638672
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,8192,16,0,1.871401596069336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,8192,8,0,3.6036350250244142
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,8192,32,0,1.0360896110534668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,8,16,1,0,0.017180800437927246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,6144,1,0,16.404400634765626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,8,16,2,0,0.016260799765586854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,8192,4,0,7.4067329406738285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,4,8192,64,0,0.6131455898284912
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,8,16,8,0,0.014956800639629364
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,8,16,4,0,0.016996799409389494
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,8,16,16,0,0.015060800313949584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,8,16,32,0,0.013444800674915314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,8,16,64,0,0.013737599551677703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,8,16,1,0,0.025228801369667053
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,8,16,2,0,0.02366719990968704
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,8,16,4,0,0.022303999960422517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,8,16,8,0,0.02203039973974228
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,8,16,16,0,0.020689600706100465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,8,16,32,0,0.020524799823760986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,8,16,64,0,0.0196383997797966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,8,32,1,0,0.02991519868373871
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,8,32,2,0,0.017131200432777403
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,8,32,4,0,0.016022400557994844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,8192,2,0,15.002020263671875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,8,32,8,0,0.01586560010910034
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,8,32,16,0,0.01618559956550598
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,8,32,32,0,0.014319999516010285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,8,32,64,0,0.013841600716114044
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,8,32,2,0,0.024644799530506134
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,8,32,1,0,0.04360640048980713
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,8,32,4,0,0.024255999922752382
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,8,32,8,0,0.023017600178718567
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,8,32,16,0,0.02176000028848648
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,8,64,1,0,0.05
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,8,32,32,0,0.02152319997549057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,8,32,64,0,0.021620799601078034
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,8,64,2,0,0.0284496009349823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,8,64,4,0,0.017590400576591492
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,8,64,8,0,0.017452800273895265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,8,64,16,0,0.017099200189113616
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,8,64,32,0,0.016094399988651274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,8,64,4,0,0.026684799790382387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,8,64,64,0,0.015227200090885162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,8,64,1,0,0.06101120114326477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,8,64,2,0,0.03884479999542236
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,8,64,8,0,0.02473759949207306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,8192,2,0,14.553849792480468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,8,64,16,0,0.02455040067434311
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,8,64,32,0,0.02282879948616028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,8,64,64,0,0.02236640006303787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,8,128,1,0,0.09145920276641846
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,8,128,2,0,0.05361440181732178
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,8,128,4,0,0.037561601400375365
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,8,128,8,0,0.02030239999294281
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,8,128,16,0,0.020073600113391876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,8,128,32,0,0.02001280039548874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,8,128,64,0,0.019072000682353974
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,8,128,1,0,0.10138239860534667
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,8,128,2,0,0.06463680267333985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,8,128,4,0,0.04461280107498169
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,8,128,8,0,0.02881760001182556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,8,128,16,0,0.028040000796318056
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,8,128,32,0,0.027935999631881713
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,8,128,64,0,0.02574560046195984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,8,256,1,0,0.18855999708175658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,8,256,2,0,0.10487040281295776
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,8,256,4,0,0.06190559864044189
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,8,256,8,0,0.04073440134525299
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,8,256,16,0,0.026535999774932862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,8,256,32,0,0.0264847993850708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,8,256,64,0,0.025440001487731935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,8,256,1,0,0.20049281120300294
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,8,256,2,0,0.11895999908447266
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,8,256,4,0,0.07466239929199218
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,8,256,8,0,0.05447999835014343
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,8,256,16,0,0.03780960142612457
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,8,256,32,0,0.036001598834991454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,8,512,2,0,0.24708640575408936
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,8,256,64,0,0.03581759929656982
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,8,512,4,0,0.13412959575653077
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,8,512,1,0,0.4589104175567627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,8,512,8,0,0.09680960178375245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,8,512,16,0,0.06634719967842102
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,8,512,32,0,0.0440447986125946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,8,512,64,0,0.04870080053806305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,8,512,2,0,0.2620352029800415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,8,512,4,0,0.15039360523223877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,8,512,1,0,0.47555041313171387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,8,512,8,0,0.11350719928741455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,8,512,16,0,0.08325920104980469
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,8,512,32,0,0.06144160032272339
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,8,512,64,0,0.06660320162773133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,8,1024,4,0,0.3496432065963745
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,8,1024,2,0,0.671395206451416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,8,1024,8,0,0.19254239797592163
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,8,1024,16,0,0.13891199827194214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,8,1024,32,0,0.09820320010185242
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,8,1024,1,0,1.2732640266418458
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,8,1024,64,0,0.07043520212173462
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,8192,1,0,31.820126342773438
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,8,1024,4,0,0.3729599952697754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,8,1024,8,0,0.2170016050338745
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,8,1024,2,0,0.7002816200256348
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,8,1024,1,0,1.309598445892334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,8,1024,16,0,0.16162079572677612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,8,1024,32,0,0.13587039709091187
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,8,1024,64,0,0.09698719978332519
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,8192,1,0,29.329953002929688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,8,1536,8,0,0.3528480052947998
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,8,1536,16,0,0.22897119522094728
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,8,1536,2,0,1.2872735977172851
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,8,1536,4,0,0.6591343879699707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,8,1536,32,0,0.1559216022491455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,8,1536,64,0,0.11898399591445923
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,8,1536,8,0,0.3832511901855469
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,8,1536,16,0,0.2628432035446167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,8,1536,32,0,0.18736000061035157
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,8,1536,64,0,0.14948159456253052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,8,1536,4,0,0.6823808193206787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,8,1536,1,0,2.6000896453857423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,8,1536,2,0,1.3365551948547363
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,8,2048,16,0,0.3197232007980347
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,8,2048,8,0,0.5590159893035889
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,8,2048,64,0,0.16729919910430907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,8,2048,32,0,0.23116641044616698
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,8,2048,4,0,1.062980842590332
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,8,1536,1,0,2.5820079803466798
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,8,2048,16,0,0.3549904108047485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,8,2048,2,0,2.1076879501342773
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,8,2048,8,0,0.6177040100097656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,8,2048,32,0,0.27300798892974854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,8,2048,64,0,0.2063999891281128
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,8,2048,4,0,1.0958656311035155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,8,2048,2,0,2.135598373413086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,8,3072,16,0,0.6120912075042725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,8,3072,8,0,1.1318767547607422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,8,3072,32,0,0.395249605178833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,8,3072,64,0,0.27017760276794434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,8,2048,1,0,4.411065673828125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,8,3072,4,0,2.2525056838989257
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,8,2048,1,0,4.330003356933593
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,8,3072,16,0,0.7188240051269531
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,8,3072,8,0,1.2271200180053712
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,8,3072,32,0,0.47577600479125975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,8,3072,64,0,0.3389375925064087
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,8,3072,4,0,2.266294479370117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,8,3072,2,0,4.632233428955078
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,8,4096,16,0,1.063371181488037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,8,4096,8,0,2.0084047317504883
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,8,3072,2,0,4.474526214599609
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,8,4096,32,0,0.5975791931152343
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,8,4096,64,0,0.41499838829040525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,8,4096,4,0,3.9448478698730467
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,8,3072,1,0,9.311798095703125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,8,4096,16,0,1.0757295608520507
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,8,3072,1,0,8.929755401611327
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,8,4096,8,0,2.017655944824219
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,8,4096,4,0,3.9123374938964846
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,16,16,1,0,0.029182401299476624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,8,4096,32,0,0.6344031810760498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,16,16,4,0,0.0164015993475914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,16,16,2,0,0.018068799376487733
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,8,4096,64,0,0.47495360374450685
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,16,16,8,0,0.017059199512004852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,16,16,16,0,0.01464959979057312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,16,16,32,0,0.015399999916553497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,8,4096,2,0,8.175443267822265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,16,16,64,0,0.013775999844074249
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,16,16,1,0,0.040862399339675906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,16,16,2,0,0.02454400062561035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,16,16,4,0,0.02465759962797165
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,16,16,16,0,0.021206399798393248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,16,16,8,0,0.023280000686645506
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,16,32,2,0,0.030136001110076905
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,16,16,32,0,0.02142080068588257
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,16,32,4,0,0.017604799568653108
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,16,16,64,0,0.021003200113773345
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,16,32,1,0,0.050697600841522215
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,16,32,8,0,0.016774399578571318
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,16,32,16,0,0.01720159947872162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,16,32,32,0,0.01536799967288971
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,8,4096,2,0,7.694459533691406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,16,32,64,0,0.01499360054731369
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,16,32,1,0,0.06195840239524841
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,16,32,2,0,0.038833600282669065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,16,32,4,0,0.025300800800323486
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,16,32,8,0,0.02497279942035675
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,16,32,16,0,0.024297599494457246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,16,32,32,0,0.02280000001192093
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,16,32,64,0,0.02192160040140152
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,16,64,1,0,0.0865343987941742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,16,64,2,0,0.05158079862594604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,16,64,4,0,0.030209600925445557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,16,64,8,0,0.019439999759197236
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,16,64,16,0,0.018475200235843658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,16,64,32,0,0.018665599822998046
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,16,64,64,0,0.017190399765968322
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,16,64,1,0,0.09867680072784424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,16,64,2,0,0.06367520093917847
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,16,64,4,0,0.041403201222419736
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,16,64,8,0,0.027483201026916503
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,16,64,16,0,0.026305601000785828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,16,128,1,0,0.16307040452957153
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,16,64,32,0,0.02656799852848053
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,16,64,64,0,0.025307199358940123
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,8,4096,1,0,15.698353576660157
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,16,128,2,0,0.09076160192489624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,16,128,4,0,0.05602719783782959
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,16,128,8,0,0.03487679958343506
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,16,128,16,0,0.023387199640274046
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,16,128,2,0,0.10375039577484131
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,16,128,32,0,0.02337760031223297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,16,128,8,0,0.0511680006980896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,16,128,64,0,0.023100799322128295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,16,128,1,0,0.17730560302734374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,16,128,4,0,0.06977279782295227
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,16,128,16,0,0.0343423992395401
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,16,128,32,0,0.033055999875068666
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,16,128,64,0,0.03306399881839752
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,8,4096,1,0,14.98701171875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,16,256,2,0,0.19514080286026
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,16,256,1,0,0.3593775987625122
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,16,256,4,0,0.10804159641265869
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,16,256,8,0,0.06891679763793945
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,16,256,16,0,0.0485071986913681
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,16,256,32,0,0.03237600028514862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,16,256,64,0,0.03238399922847748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,16,256,2,0,0.21207358837127685
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,16,256,1,0,0.37951040267944336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,16,256,4,0,0.12408000230789185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,16,256,8,0,0.08556960225105285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,16,256,16,0,0.06483039855957032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,16,256,32,0,0.047838398814201356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,16,256,64,0,0.04577600061893463
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,16,512,32,0,0.07913920283317566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,16,512,4,0,0.24916958808898926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,16,512,2,0,0.4767615795135498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,16,512,8,0,0.17296799421310424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,16,512,1,0,0.8922639846801758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,16,512,4,0,0.27550559043884276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,16,512,16,0,0.11029280424118042
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,16,512,64,0,0.05904480218887329
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,16,512,2,0,0.5038415908813476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,16,512,8,0,0.19655200242996215
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,16,512,16,0,0.1363263964653015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,16,512,1,0,0.9301520347595215
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,16,512,32,0,0.10510879755020142
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,16,512,64,0,0.08488320112228394
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,16,1024,8,0,0.36921119689941406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,16,1024,64,0,0.12643359899520873
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,16,1024,16,0,0.25293281078338625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,16,1024,4,0,0.681608009338379
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,16,1024,32,0,0.16974079608917236
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,16,1024,2,0,1.3292896270751953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,16,1024,8,0,0.4097599983215332
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,16,1024,16,0,0.2911056041717529
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,16,1024,32,0,0.2107840061187744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,16,1024,4,0,0.7193727970123291
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,16,1024,2,0,1.3722335815429687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,16,1024,64,0,0.16962720155715943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,16,1024,1,0,2.6379247665405274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,16,1024,1,0,2.5718399047851563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,16,1536,8,0,0.6854479789733887
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,16,1536,64,0,0.20054080486297607
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,16,1536,16,0,0.43867039680480957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,16,1536,4,0,1.309171199798584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,16,1536,2,0,2.5969600677490234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,16,1536,32,0,0.2775871992111206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,16,1536,8,0,0.7543920040130615
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,16,1536,32,0,0.3613728046417236
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,16,1536,16,0,0.5112368106842041
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,16,1536,64,0,0.2603312015533447
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,16,1536,4,0,1.3429519653320312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,16,1536,2,0,2.641046333312988
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,16,1536,1,0,5.263711929321289
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,16,2048,16,0,0.6147632122039794
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,16,2048,8,0,1.106339168548584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,16,2048,32,0,0.4195119857788086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,16,2048,64,0,0.295467209815979
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,16,2048,4,0,2.1476463317871093
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,16,1536,1,0,5.164187240600586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,16,2048,8,0,1.204374408721924
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,16,2048,32,0,0.49532160758972166
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,16,2048,4,0,2.224937629699707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,16,2048,16,0,0.7175983905792236
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,16,2048,2,0,4.474934387207031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,32,16,1,0,0.05234559774398804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,16,2048,64,0,0.3681807994842529
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,32,16,2,0,0.03152160048484802
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,32,16,4,0,0.01900320053100586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,32,16,8,0,0.018108800053596497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,32,16,16,0,0.017289599776268004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,32,16,32,0,0.016308799386024475
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,32,16,64,0,0.01594720035791397
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,32,16,1,0,0.06274719834327698
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,32,16,2,0,0.04153920114040375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,16,2048,2,0,4.359952163696289
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,32,16,4,0,0.026635199785232544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,32,16,8,0,0.02592960000038147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,32,16,16,0,0.024648000299930573
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,32,16,32,0,0.023638400435447692
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,32,16,64,0,0.02314079999923706
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,32,32,1,0,0.08723840117454529
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,32,32,2,0,0.05198720097541809
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,32,32,4,0,0.030327999591827394
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,32,32,8,0,0.02011999934911728
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,32,32,16,0,0.01947679966688156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,32,32,32,0,0.018873600661754607
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,16,2048,1,0,8.84883804321289
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,32,32,64,0,0.01751520037651062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,32,32,1,0,0.10064159631729126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,32,32,16,0,0.027115198969841003
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,32,32,2,0,0.06309599876403808
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,32,32,4,0,0.04407039880752563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,32,32,8,0,0.028089600801467895
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,32,32,64,0,0.025390401482582092
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,32,32,32,0,0.02638559937477112
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,32,64,1,0,0.1591104030609131
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,32,64,2,0,0.08926240205764771
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,16,2048,1,0,8.864548492431641
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,32,64,4,0,0.05550720095634461
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,32,64,8,0,0.034620800614356996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,32,64,16,0,0.023094399273395537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,32,64,32,0,0.02237440049648285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,32,64,64,0,0.022379200160503387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,32,64,1,0,0.16998720169067383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,32,64,2,0,0.10180799961090088
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,32,64,8,0,0.048931199312210086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,32,64,4,0,0.07167199850082398
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,32,64,16,0,0.03396160006523132
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,32,64,32,0,0.0326335996389389
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,32,64,64,0,0.032569599151611325
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,32,128,1,0,0.31479198932647706
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,32,128,2,0,0.16796000003814698
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,32,128,1,0,0.32881600856781007
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,32,128,4,0,0.09876160025596618
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,32,128,8,0,0.06373440027236939
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,32,128,16,0,0.04356000125408173
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,32,128,32,0,0.030422401428222657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,32,128,64,0,0.029820799827575684
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,32,128,2,0,0.1852336049079895
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,32,128,4,0,0.1166159987449646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,32,128,8,0,0.081358402967453
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,32,128,16,0,0.060734397172927855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,32,256,8,0,0.12167520523071289
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,32,128,32,0,0.046408000588417056
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,32,128,64,0,0.04324640035629272
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,32,256,2,0,0.38122239112854006
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,32,256,1,0,0.7021664142608642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,32,256,4,0,0.1997375965118408
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,32,256,16,0,0.08210399746894836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,32,256,32,0,0.06116480231285095
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,32,256,64,0,0.047091200947761536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,32,256,4,0,0.22561919689178467
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,32,256,8,0,0.14599519968032837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,32,256,2,0,0.4072112083435059
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,32,256,16,0,0.10804799795150757
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,32,256,1,0,0.7285952091217041
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,32,256,32,0,0.08627359867095948
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,32,256,64,0,0.0697376012802124
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,32,512,4,0,0.4918208122253418
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,32,512,8,0,0.33075039386749266
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,32,512,16,0,0.2034127950668335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,32,512,2,0,0.9520015716552734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,32,512,32,0,0.13958239555358887
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,32,512,64,0,0.1074336051940918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,32,512,1,0,1.7679567337036133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,32,512,4,0,0.5358672142028809
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,32,512,2,0,0.9885696411132813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,32,512,8,0,0.36956000328063965
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,32,512,1,0,1.8060079574584962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,32,512,16,0,0.24290239810943604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,32,512,32,0,0.18323520421981812
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,32,512,64,0,0.15031520128250123
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,32,1024,8,0,0.7166704177856446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,32,1024,16,0,0.4779952049255371
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,32,1024,32,0,0.30692479610443113
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,32,1024,64,0,0.22457919120788575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,32,1024,4,0,1.354867172241211
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,32,1024,2,0,2.6815088272094725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,32,1024,8,0,0.7922768115997314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,32,1024,16,0,0.5547840118408203
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,32,1024,32,0,0.3859695911407471
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,32,1024,4,0,1.433894443511963
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,32,1024,64,0,0.30353920459747313
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,64,16,1,0,0.08931040167808532
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,64,16,2,0,0.05313119888305664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,64,16,4,0,0.03424479961395264
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,64,16,8,0,0.02067999988794327
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,32,1024,2,0,2.718657684326172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,64,16,16,0,0.01971839964389801
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,64,16,32,0,0.017731200158596038
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,64,16,64,0,0.018062399327754976
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,64,16,1,0,0.10002239942550659
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,32,1024,1,0,5.405510330200196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,64,16,2,0,0.06540480256080627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,64,16,4,0,0.043054398894309995
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,64,16,8,0,0.028622400760650635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,64,16,16,0,0.02717759907245636
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,64,16,32,0,0.026238399744033813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,64,16,64,0,0.02597759962081909
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,64,32,1,0,0.15792479515075683
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,32,1024,1,0,5.1324817657470705
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,64,32,2,0,0.09266560077667237
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,64,32,4,0,0.05620800256729126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,64,32,8,0,0.03545919954776764
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,64,32,16,0,0.023756800591945647
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,64,32,32,0,0.022537599503993987
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,64,32,64,0,0.021027199923992157
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,64,32,2,0,0.10562720298767089
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,64,32,64,0,0.031411200761795044
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,64,32,1,0,0.17915680408477783
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,64,32,4,0,0.06967520117759704
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,64,32,8,0,0.049486398696899414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,64,32,16,0,0.03447679877281189
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,64,32,32,0,0.032595199346542356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,64,64,2,0,0.1625167965888977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,64,64,1,0,0.30747840404510496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,64,64,4,0,0.09715200066566468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,64,64,8,0,0.06300640106201172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,64,64,1,0,0.31039841175079347
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,64,64,16,0,0.042817598581314086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,64,64,8,0,0.07996479868888855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,64,64,32,0,0.03086720108985901
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,64,64,64,0,0.02943359911441803
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,64,64,2,0,0.17953599691390992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,64,64,4,0,0.11487360000610351
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,64,64,16,0,0.060201597213745114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,64,64,32,0,0.04551840126514435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,64,64,64,0,0.0438511997461319
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,64,128,2,0,0.31718080043792723
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,64,128,64,0,0.04421280026435852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,64,128,4,0,0.18290400505065918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,64,128,1,0,0.6105648040771484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,64,128,8,0,0.11300959587097167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,64,128,16,0,0.0772383987903595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,64,128,32,0,0.05719839930534363
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,64,128,4,0,0.20701758861541747
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,64,128,2,0,0.3437983989715576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,64,128,1,0,0.6351776123046875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,64,128,8,0,0.13884639739990234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,64,128,16,0,0.1025920033454895
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,64,128,32,0,0.0822704017162323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,64,128,64,0,0.06755200028419495
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,64,256,4,0,0.3856415987014771
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,64,256,64,0,0.08431360125541687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,64,256,2,0,0.7482992172241211
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,64,256,8,0,0.22777280807495118
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,64,256,16,0,0.15053600072860718
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,64,256,32,0,0.11177760362625122
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,64,256,8,0,0.26953439712524413
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,64,256,1,0,1.3821104049682618
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,64,256,16,0,0.19233440160751342
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,64,256,4,0,0.42691359519958494
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,64,256,32,0,0.15531519651412964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,64,256,2,0,0.7928880214691162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,64,256,64,0,0.12465599775314332
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,64,256,1,0,1.4311375617980957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,64,512,16,0,0.3844496011734009
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,64,512,4,0,0.9633935928344727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,64,512,64,0,0.1934048056602478
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,64,512,32,0,0.256822395324707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,64,512,8,0,0.6322800159454346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,64,512,2,0,1.8676959991455078
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,64,512,16,0,0.4609856128692627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,64,512,32,0,0.33699679374694824
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,64,512,8,0,0.7132287979125976
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,64,512,64,0,0.2746079921722412
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,64,512,4,0,1.047424030303955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,128,16,1,0,0.1584831953048706
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,128,16,2,0,0.09269440174102783
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,64,512,1,0,3.490670394897461
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,128,16,4,0,0.0572704017162323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,128,16,8,0,0.03711999952793121
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,64,512,2,0,1.9656208038330079
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,128,16,16,0,0.024161599576473236
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,128,16,32,0,0.02264000028371811
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,128,16,64,0,0.022307200729846953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,128,16,2,0,0.10519839525222778
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,128,16,1,0,0.1796239972114563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,128,16,4,0,0.06952639818191528
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,128,16,8,0,0.0525056004524231
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,128,16,16,0,0.03590719997882843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,64,512,1,0,3.557651138305664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,128,16,32,0,0.03332479894161224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,128,16,64,0,0.03238399922847748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,128,32,1,0,0.29453120231628416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,128,32,2,0,0.16344480514526366
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,128,32,4,0,0.09874719977378846
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,128,32,8,0,0.06335520148277282
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,128,32,16,0,0.044391998648643495
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,128,32,32,0,0.031465598940849306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,128,32,64,0,0.028692799806594848
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,128,32,1,0,0.3109328031539917
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,128,32,2,0,0.1800927996635437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,128,32,4,0,0.11467839479446411
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,128,32,8,0,0.08050879836082458
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,128,32,16,0,0.060417598485946654
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,128,32,32,0,0.046116799116134644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,128,32,64,0,0.04320000112056732
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,128,64,2,0,0.3096287965774536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,128,64,1,0,0.5772768020629883
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,128,64,4,0,0.17757920026779175
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,128,64,8,0,0.11213760375976563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,128,64,16,0,0.07545599937438965
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,128,64,32,0,0.05611519813537598
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,128,64,64,0,0.0446943998336792
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,128,64,4,0,0.20248799324035643
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,128,64,2,0,0.33566880226135254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,128,64,1,0,0.5981311798095703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,128,64,8,0,0.13715840578079225
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,128,64,16,0,0.10126880407333375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,128,64,32,0,0.08181599974632263
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,128,128,4,0,0.3465471982955933
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,128,64,64,0,0.06982399821281433
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,128,128,8,0,0.21144640445709229
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,128,128,2,0,0.6202383995056152
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,128,128,16,0,0.14180959463119508
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,128,128,32,0,0.10696799755096435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,128,128,1,0,1.2018671989440919
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,128,128,64,0,0.08050240278244018
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,128,128,4,0,0.39100160598754885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,128,128,8,0,0.25329759120941164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,128,128,16,0,0.1871664047241211
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,128,128,2,0,0.6644288063049316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,128,128,1,0,1.24693603515625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,128,128,64,0,0.12098560333251954
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,128,128,32,0,0.14997440576553345
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,128,256,8,0,0.44038238525390627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,128,256,16,0,0.2830496072769165
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,128,256,4,0,0.7550496101379395
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,128,256,32,0,0.2037440061569214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,128,256,64,0,0.15540640354156493
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,128,256,2,0,1.4864527702331543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,128,256,16,0,0.36124000549316404
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,128,256,8,0,0.5186399936676025
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,128,256,32,0,0.2858783960342407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,128,256,4,0,0.8302000045776368
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,128,256,1,0,2.7378000259399413
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,128,256,64,0,0.2370975971221924
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,256,16,1,0,0.3000591993331909
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,256,16,2,0,0.16727520227432252
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,256,16,4,0,0.10051679611206055
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,256,16,64,0,0.03560959994792938
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,128,256,2,0,1.5638959884643555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,256,16,8,0,0.06448799967765809
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,256,16,16,0,0.04418720006942749
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,256,16,32,0,0.036366400122642514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,128,256,1,0,2.8172800064086916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,256,16,1,0,0.31538240909576415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,256,16,2,0,0.18332159519195557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,256,16,4,0,0.11709439754486084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,256,16,8,0,0.08241119980812073
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,256,16,16,0,0.06118239760398865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,256,32,8,0,0.1140015959739685
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,256,16,32,0,0.049988800287246705
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,256,16,64,0,0.049630400538444516
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,256,32,2,0,0.31182401180267333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,256,32,4,0,0.18021440505981445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,256,32,1,0,0.5773519992828369
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,256,32,16,0,0.07734720110893249
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,256,32,32,0,0.05612800121307373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,256,32,64,0,0.04973599910736084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,256,32,2,0,0.33728001117706297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,256,32,64,0,0.07608799934387207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,256,32,4,0,0.2067023992538452
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,256,32,8,0,0.1387712001800537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,256,32,1,0,0.6017392158508301
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,256,32,32,0,0.08082879781723022
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,256,32,16,0,0.10351040363311767
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,256,64,4,0,0.34264800548553465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,256,64,8,0,0.20999999046325685
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,256,64,16,0,0.14318399429321288
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,256,64,2,0,0.6084928035736084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,256,64,32,0,0.10560959577560425
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,256,64,64,0,0.08325120210647582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,256,64,1,0,1.1389264106750487
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,256,64,8,0,0.25383040904998777
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,256,64,4,0,0.3855168104171753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,256,64,16,0,0.18636959791183472
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,256,64,2,0,0.6523231983184814
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,256,64,32,0,0.14912960529327393
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,256,64,64,0,0.12713760137557983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,256,64,1,0,1.1936176300048829
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,256,128,4,0,0.6739823818206787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,256,128,16,0,0.26382400989532473
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,256,128,8,0,0.40170722007751464
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,256,128,32,0,0.1970576047897339
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,256,128,2,0,1.220571231842041
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,1,256,128,64,0,0.15355039834976197
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,256,128,1,0,2.3886255264282226
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,256,128,8,0,0.4846352100372314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,256,128,4,0,0.7534848213195801
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,256,128,16,0,0.3460207939147949
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,256,128,32,0,0.27709920406341554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,256,128,2,0,1.3023088455200196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,1,16,1,0,0.01544319987297058
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,16,2,0,0.01510239988565445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,1,256,128,64,0,0.24253599643707274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,16,4,0,0.013825599849224091
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,16,8,0,0.013644799590110779
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,16,16,0,0.014107200503349304
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,16,32,0,0.01358720064163208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,256,128,1,0,2.4701696395874024
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,16,64,0,0.01297599971294403
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,1,16,1,0,0.022655999660491942
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,16,2,0,0.020934399962425233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,16,4,0,0.020380799472332
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,16,8,0,0.020532800257205962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,16,16,0,0.02052319943904877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,16,32,0,0.019889600574970245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,16,64,0,0.019679999351501463
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,1,32,1,0,0.015585599839687348
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,32,2,0,0.015377600491046906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,32,4,0,0.01404000073671341
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,32,8,0,0.014120000600814819
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,32,16,0,0.014020800590515137
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,32,32,0,0.014032000303268432
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,32,64,0,0.01313759982585907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,1,32,1,0,0.023387199640274046
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,32,2,0,0.02268480062484741
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,32,4,0,0.020880000293254854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,32,8,0,0.020795199275016784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,32,16,0,0.020604799687862396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,32,32,0,0.020764799416065217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,32,64,0,0.020136000216007234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,1,64,1,0,0.01655679941177368
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,64,2,0,0.01579679995775223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,64,4,0,0.015411199629306793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,64,8,0,0.014343999326229095
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,64,16,0,0.01419519931077957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,64,32,0,0.014215999841690063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,64,64,0,0.014008000493049622
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,1,64,1,0,0.024238400161266327
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,64,2,0,0.022470399737358093
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,64,4,0,0.022385600209236144
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,64,8,0,0.021191999316215515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,64,16,0,0.020905600488185884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,64,32,0,0.02078399956226349
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,64,64,0,0.020820799469947814
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,1,128,1,0,0.03091680109500885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,128,2,0,0.018111999332904815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,128,4,0,0.017190399765968322
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,128,8,0,0.017057600617408752
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,128,16,0,0.015908800065517426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,128,32,0,0.015943999588489532
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,128,64,0,0.01581439971923828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,1,128,1,0,0.04138559997081757
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,128,2,0,0.02518880069255829
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,128,4,0,0.024822400510311128
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,128,8,0,0.02401600033044815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,128,16,0,0.022987200319766997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,128,32,0,0.02273920029401779
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,128,64,0,0.022603200376033784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,1,256,1,0,0.058422398567199704
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,256,2,0,0.03197599947452545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,256,4,0,0.021459199488162994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,256,8,0,0.020868800580501556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,256,16,0,0.020025600492954255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,256,32,0,0.020193600654602052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,256,64,0,0.0190528005361557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,1,256,1,0,0.07042080163955688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,256,2,0,0.04068000018596649
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,256,4,0,0.029128000140190125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,256,8,0,0.0274944007396698
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,256,16,0,0.02743679881095886
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,256,32,0,0.026256000995635985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,256,64,0,0.02616479992866516
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,1,512,1,0,0.12694560289382933
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,512,2,0,0.07106879949569703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,512,4,0,0.03824479877948761
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,512,8,0,0.027744001150131224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,512,16,0,0.03651840090751648
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,512,32,0,0.029203200340270997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,512,64,0,0.024223999679088594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,1,512,1,0,0.13843040466308593
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,512,2,0,0.08158079981803894
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,512,4,0,0.04933759868144989
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,1,1024,1,0,0.33225278854370116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,512,8,0,0.03521279990673065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,512,16,0,0.04642559885978699
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,512,32,0,0.03690080046653747
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,512,64,0,0.032339200377464294
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,1024,32,0,0.043750399351119997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,1024,64,0,0.03244799971580505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,1024,2,0,0.17743200063705444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,1024,4,0,0.09600639939308167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,1024,8,0,0.051609599590301515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,1024,16,0,0.041022399067878725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,1024,4,0,0.10577759742736817
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,1024,2,0,0.188481605052948
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,1,1024,1,0,0.3441904067993164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,1024,8,0,0.06380959749221801
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,1024,16,0,0.04979200065135956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,1024,32,0,0.05584959983825684
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,1024,64,0,0.04173760116100311
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,1536,4,0,0.17544480562210082
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,1,1536,1,0,0.6218400001525879
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,1536,64,0,0.04378879964351654
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,1536,2,0,0.3322751998901367
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,1536,8,0,0.09689599871635438
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,1536,16,0,0.05602239966392517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,1536,32,0,0.06184800267219544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,1536,4,0,0.18498239517211915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,1536,2,0,0.34361441135406495
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,1,1536,1,0,0.6382912158966064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,1536,8,0,0.1079103946685791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,1536,16,0,0.06663359999656678
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,1536,32,0,0.0737392008304596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,1536,64,0,0.05609920024871826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,2048,4,0,0.27797439098358157
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,2048,8,0,0.14430559873580934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,2048,2,0,0.5334127902984619
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,2048,16,0,0.07852960228919983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,2048,32,0,0.0686352014541626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,2048,64,0,0.05979999899864197
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,1,2048,1,0,1.0092960357666017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,2048,4,0,0.2846496105194092
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,2048,64,0,0.07340800166130065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,2048,2,0,0.5409584045410156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,2048,8,0,0.1588912010192871
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,2048,16,0,0.09138879776000977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,1,2048,1,0,1.0119711875915527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,2048,32,0,0.07974720001220703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,3072,8,0,0.28307039737701417
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,3072,16,0,0.16480640172958375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,3072,32,0,0.09742559790611267
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,3072,4,0,0.5611504077911377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,3072,64,0,0.08260160088539123
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,3072,2,0,1.0560416221618651
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,3072,8,0,0.2945136070251465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,3072,16,0,0.1759119987487793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,3072,4,0,0.5686031818389893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,3072,32,0,0.11193599700927734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,1,3072,1,0,2.153822326660156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,3072,64,0,0.09847840070724487
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,3072,2,0,1.0889535903930665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,1,3072,1,0,2.075081634521484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,4096,8,0,0.46793761253356936
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,4096,16,0,0.26177918910980225
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,4096,4,0,0.9259920120239258
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,4096,64,0,0.12528159618377685
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,4096,32,0,0.1339632034301758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,4096,2,0,1.8843791961669922
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,4096,16,0,0.266211199760437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,4096,8,0,0.49912638664245607
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,4096,32,0,0.14832639694213867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,4096,64,0,0.15774400234222413
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,4096,4,0,0.9466447830200195
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,4096,2,0,1.7948848724365234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,1,4096,1,0,3.715462493896484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,6144,16,0,0.5140543937683105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,6144,32,0,0.2958751916885376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,6144,64,0,0.2082848072052002
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,6144,8,0,1.0445775985717773
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,6144,4,0,1.999795150756836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,1,4096,1,0,3.6705791473388674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,6144,8,0,1.0998127937316895
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,6144,32,0,0.33154399394989015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,6144,16,0,0.5802864074707031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,6144,64,0,0.2243648052215576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,6144,4,0,2.0605663299560546
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,6144,2,0,4.220169448852539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,8192,16,0,0.9182880401611329
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,6144,2,0,4.179243087768555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,8192,8,0,1.8023632049560547
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,8192,32,0,0.4936336040496826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,8192,64,0,0.2509439945220947
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,8192,4,0,3.6548416137695314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,1,6144,1,0,8.370649719238282
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,1,6144,1,0,8.060467529296876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,8192,16,0,0.9132880210876465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,8192,8,0,1.898124885559082
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,8192,64,0,0.2749216079711914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,8192,4,0,3.501715087890625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,8192,32,0,0.477950382232666
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,8192,2,0,7.446047973632813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,10240,16,0,1.382096004486084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,10240,8,0,2.7846048355102537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,8192,2,0,7.276841735839843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,10240,32,0,0.7278463840484619
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,10240,64,0,0.43282241821289064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,10240,4,0,5.716796875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,1,8192,1,0,14.797187805175781
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,1,8192,1,0,14.070249938964844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,10240,8,0,2.65456485748291
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,10240,2,0,11.375961303710938
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,10240,16,0,1.3283535957336425
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,10240,4,0,5.467420959472657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,10240,32,0,0.7465936183929444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,10240,64,0,0.46416797637939455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,12288,16,0,1.9699424743652343
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,10240,2,0,10.978566741943359
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,12288,64,0,0.5555535793304444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,12288,8,0,3.8547439575195312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,12288,32,0,1.1012255668640136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,12288,4,0,8.295787048339843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,1,10240,1,0,23.49090118408203
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,12288,4,0,7.994429016113282
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,12288,8,0,4.301798248291016
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,1,10240,1,0,22.257423400878906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,12288,16,0,2.3099679946899414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,12288,64,0,0.6455167770385742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,12288,32,0,1.0174960136413573
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,12288,2,0,17.54740753173828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,12288,2,0,16.858807373046876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,1,16384,16,0,3.620113754272461
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,1,16384,8,0,7.443431854248047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,1,16384,64,0,0.9196736335754394
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,1,16384,32,0,1.7398752212524413
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,1,16384,4,0,14.890330505371093
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,1,12288,1,0,34.57001647949219
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,1,16384,8,0,7.079273223876953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,1,12288,1,0,32.33997497558594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,1,16384,4,0,13.869535827636719
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,1,16384,64,0,0.9069408416748047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,1,16384,16,0,3.442830276489258
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,2,16,1,0,0.016921600699424742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,16,2,0,0.015591999888420105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,16,4,0,0.015857599675655365
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,16,8,0,0.014480000734329224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,1,16384,32,0,1.770737648010254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,16,16,0,0.01451520025730133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,16,32,0,0.014614400267601014
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,16,64,0,0.01358560025691986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,2,16,1,0,0.02337760031223297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,16,2,0,0.02274720072746277
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,16,4,0,0.021268799901008606
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,16,8,0,0.020769600570201874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,16,16,0,0.0202224001288414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,16,32,0,0.0201664000749588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,16,64,0,0.019673599302768706
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,2,32,1,0,0.016395199298858642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,32,2,0,0.01563359946012497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,32,4,0,0.015668800473213194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,32,8,0,0.014793600142002105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,32,16,0,0.014248000085353851
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,32,32,0,0.013785600662231445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,32,64,0,0.013278399407863618
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,2,32,1,0,0.02423679977655411
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,32,2,0,0.02285439968109131
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,32,4,0,0.02196960002183914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,32,8,0,0.020584000647068022
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,32,16,0,0.021166400611400606
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,32,32,0,0.020692799985408784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,32,64,0,0.019996799528598785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,2,64,1,0,0.027480000257492067
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,64,2,0,0.016577599942684172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,64,4,0,0.015803200006484986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,64,8,0,0.015934400260448456
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,1,16384,2,0,29.92113037109375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,64,64,0,0.014399999380111694
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,64,16,0,0.014336000382900237
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,64,32,0,0.014361600577831268
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,2,64,1,0,0.03489919900894165
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,64,2,0,0.02666560113430023
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,64,4,0,0.023523199558258056
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,64,8,0,0.022416000068187714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,64,16,0,0.02171359956264496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,64,32,0,0.020764799416065217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,64,64,0,0.021823999285697938
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,128,16,0,0.016982400417327882
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,128,4,0,0.01851679980754852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,2,128,1,0,0.052744001150131226
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,128,8,0,0.017497600615024568
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,128,2,0,0.028731200098991393
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,128,32,0,0.016147199273109435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,128,64,0,0.01650400012731552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,2,128,1,0,0.06298879981040954
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,128,2,0,0.03859840035438537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,128,16,0,0.024502399563789367
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,128,4,0,0.025867199897766112
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,128,8,0,0.02446240037679672
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,128,32,0,0.023871999979019166
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,128,64,0,0.02313600033521652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,2,256,1,0,0.1030128002166748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,256,2,0,0.058392000198364255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,256,4,0,0.03255999982357025
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,256,8,0,0.022188800573349
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,256,16,0,0.02139520049095154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,2,256,1,0,0.1120144009590149
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,256,32,0,0.02144159972667694
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,1,16384,2,0,28.43187255859375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,256,64,0,0.02044160068035126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,256,2,0,0.06936479806900024
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,256,4,0,0.04297440052032471
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,256,8,0,0.030079999566078187
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,256,32,0,0.02843039929866791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,256,16,0,0.03052319884300232
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,256,64,0,0.027859199047088622
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,2,512,1,0,0.2312688112258911
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,512,2,0,0.1290992021560669
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,512,4,0,0.07108799815177917
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,512,8,0,0.040249601006507874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,512,16,0,0.034360000491142274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,2,512,1,0,0.24433600902557373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,512,32,0,0.03763999938964844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,512,64,0,0.030769601464271545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,512,2,0,0.14068000316619872
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,512,4,0,0.08130720257759094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,512,8,0,0.05081279873847962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,512,16,0,0.04214879870414734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,512,32,0,0.05053439736366272
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,512,64,0,0.038529598712921144
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,1024,2,0,0.33771519660949706
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,1024,4,0,0.1775439977645874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,1024,8,0,0.09627199769020081
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,2,1024,1,0,0.6353903770446777
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,1024,16,0,0.054788798093795776
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,1024,32,0,0.04964799880981445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,1024,64,0,0.047577598690986635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,1024,2,0,0.34632000923156736
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,1024,4,0,0.18924319744110107
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,1024,8,0,0.10822720527648926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,2,1024,1,0,0.6522336006164551
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,1024,16,0,0.06819360256195069
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,1024,32,0,0.06020960211753845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,1024,64,0,0.06045920252799988
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,1536,4,0,0.3367167949676514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,1536,8,0,0.17379839420318605
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,1536,2,0,0.6449967861175537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,1536,16,0,0.09884480237960816
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,1536,32,0,0.06723520159721375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,1536,64,0,0.07327200174331665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,2,1536,1,0,1.2194704055786132
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,1536,16,0,0.11309759616851807
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,1536,8,0,0.19051040410995485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,1536,4,0,0.34783520698547366
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,1536,32,0,0.08511840105056763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,1536,2,0,0.6658112049102783
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,1536,64,0,0.08882079720497131
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,2,1536,1,0,1.2311023712158202
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,2048,8,0,0.2806416034698486
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,2048,16,0,0.14922239780426025
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,2048,4,0,0.5519375801086426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,2048,32,0,0.08621439933776856
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,2048,2,0,1.058347225189209
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,2048,64,0,0.08045120239257812
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,2048,8,0,0.2945280075073242
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,2048,16,0,0.16850240230560304
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,2048,4,0,0.5528528213500976
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,2,2048,1,0,2.0540559768676756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,2048,32,0,0.10210399627685547
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,2048,64,0,0.09592000246047974
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,2048,2,0,1.0751184463500976
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,2,2048,1,0,1.9833087921142578
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,3072,16,0,0.29543681144714357
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,3072,8,0,0.5589056015014648
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,3072,32,0,0.1681615948677063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,3072,64,0,0.11782879829406738
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,3072,4,0,1.1126399993896485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,3072,2,0,2.2408735275268556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,3072,8,0,0.594972801208496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,3072,4,0,1.1174240112304688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,3072,16,0,0.33630239963531494
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,3072,32,0,0.19585280418395995
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,3072,64,0,0.13927359580993653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,3072,2,0,2.2028175354003907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,2,3072,1,0,4.491211318969727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,4096,8,0,0.9587776184082031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,4096,16,0,0.5241663932800293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,4096,4,0,1.8601728439331056
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,4096,64,0,0.15445120334625245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,4096,32,0,0.2642960071563721
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,2,3072,1,0,4.375892639160156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,4096,2,0,3.90002555847168
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,4096,8,0,1.0298272132873536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,4096,32,0,0.28147039413452146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,4096,4,0,1.8563039779663086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,4096,16,0,0.5805600166320801
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,4096,64,0,0.170033597946167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,1,16384,1,0,58.86790161132812
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,4096,2,0,3.8020511627197267
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,2,4096,1,0,7.844750213623047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,6144,16,0,1.0541600227355956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,6144,64,0,0.333788800239563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,6144,32,0,0.5681680202484131
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,6144,8,0,2.034651184082031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,6144,4,0,4.283956909179688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,2,4096,1,0,7.391719818115234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,6144,8,0,2.1660320281982424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,1,16384,1,0,58.126043701171874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,6144,4,0,4.140659332275391
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,6144,16,0,1.0311920166015625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,6144,32,0,0.5794511795043945
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,6144,64,0,0.3564784049987793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,6144,2,0,8.921449279785156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,6144,2,0,8.302467346191406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,8192,8,0,3.75810546875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,8192,16,0,1.788243293762207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,8192,64,0,0.4845776081085205
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,8192,32,0,1.0176336288452148
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,8192,4,0,7.725657653808594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,2,6144,1,0,16.780506896972657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,8192,16,0,1.7740400314331055
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,2,6144,1,0,16.182745361328124
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,8192,32,0,0.931606388092041
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,8192,8,0,3.6860641479492187
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,8192,64,0,0.5339583873748779
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,8192,4,0,7.4167724609375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,8192,2,0,15.333982849121094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,10240,16,0,2.9101696014404297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,10240,8,0,6.337547302246094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,8192,2,0,14.167379760742188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,10240,32,0,1.4575599670410155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,10240,64,0,0.8415072441101075
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,10240,4,0,12.616171264648438
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,2,8192,1,0,31.15853271484375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,10240,8,0,5.745841598510742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,10240,32,0,1.4227215766906738
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,10240,4,0,11.831025695800781
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,10240,64,0,0.8045999526977539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,10240,16,0,2.8237871170043944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,2,8192,1,0,29.772903442382812
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,10240,2,0,23.405955505371093
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,12288,16,0,4.15186882019043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,12288,32,0,2.1693872451782226
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,10240,2,0,23.369435119628907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,12288,8,0,8.64211654663086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,12288,64,0,1.0441280364990235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,12288,4,0,16.73090515136719
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,2,10240,1,0,47.1254150390625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,12288,8,0,8.231249237060547
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,12288,4,0,15.972914123535157
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,12288,64,0,1.075592041015625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,12288,32,0,2.072760009765625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,12288,16,0,3.9477134704589845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,2,10240,1,0,45.612249755859374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,12288,2,0,33.8851318359375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,2,16384,16,0,7.401828765869141
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,12288,2,0,32.58063659667969
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,2,16384,64,0,1.8606639862060548
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,2,16384,32,0,3.5864513397216795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,2,16384,8,0,14.893547058105469
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,2,16384,4,0,29.49754638671875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,2,12288,1,0,69.18558959960937
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,2,16384,8,0,14.322195434570313
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,2,12288,1,0,65.2445556640625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,2,16384,4,0,27.987728881835938
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,4,16,1,0,0.016961599886417388
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,2,16384,16,0,7.350254058837891
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,16,2,0,0.016284799575805663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,16,4,0,0.017027199268341064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,16,8,0,0.014974400401115417
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,16,16,0,0.014750400185585022
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,16,32,0,0.013729600608348847
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,16,64,0,0.013742400705814362
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,4,16,1,0,0.023977600038051605
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,16,2,0,0.024006399512290954
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,16,4,0,0.022419199347496033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,2,16384,64,0,1.8329759597778321
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,16,8,0,0.021044799685478212
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,16,16,0,0.02066880017518997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,16,32,0,0.0217616006731987
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,16,64,0,0.02011680006980896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,4,32,1,0,0.02771199941635132
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,32,2,0,0.01763039976358414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,32,4,0,0.015465599298477174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,2,16384,32,0,3.5298320770263674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,32,8,0,0.015447999536991119
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,32,16,0,0.01675360053777695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,32,64,0,0.01451839953660965
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,32,32,0,0.014144000411033631
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,4,32,1,0,0.03528000116348266
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,32,2,0,0.024542400240898134
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,32,4,0,0.02399359941482544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,32,8,0,0.0223471999168396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,32,16,0,0.02139520049095154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,32,32,0,0.020923200249671935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,32,64,0,0.021249599754810333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,4,64,1,0,0.048467200994491574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,64,2,0,0.025415998697280884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,64,4,0,0.016752000153064727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,64,8,0,0.01647839993238449
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,64,16,0,0.016067199409008026
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,64,32,0,0.01582240015268326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,64,64,0,0.014388799667358398
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,4,64,1,0,0.058852797746658324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,64,2,0,0.03663359880447388
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,64,4,0,0.024644799530506134
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,64,8,0,0.02409279942512512
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,64,16,0,0.023265600204467773
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,64,32,0,0.022894400358200073
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,64,64,0,0.021566399931907655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,4,128,1,0,0.0896448016166687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,128,2,0,0.052553600072860716
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,128,4,0,0.031481599807739256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,128,8,0,0.018780800700187682
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,128,16,0,0.0183119997382164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,128,32,0,0.018240000307559966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,128,64,0,0.017427200078964235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,4,128,1,0,0.10158720016479492
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,128,2,0,0.06374719738960266
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,128,4,0,0.040119999647140504
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,128,8,0,0.026348799467086792
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,128,16,0,0.025953599810600282
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,128,32,0,0.02539680004119873
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,128,64,0,0.024527999758720397
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,4,256,1,0,0.18364959955215454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,256,2,0,0.10284639596939087
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,256,4,0,0.05964159965515137
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,256,8,0,0.03605920076370239
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,256,16,0,0.023449599742889404
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,256,32,0,0.022793599963188173
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,256,64,0,0.022732800245285033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,2,16384,2,0,59.81864624023437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,4,256,1,0,0.1941391944885254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,256,2,0,0.11293120384216308
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,256,4,0,0.07085919976234437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,256,16,0,0.031630399823188785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,256,8,0,0.05151039958000183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,256,32,0,0.030713599920272828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,256,64,0,0.030780801177024843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,4,512,1,0,0.44437599182128906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,512,2,0,0.23728320598602295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,512,4,0,0.1285823941230774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,512,8,0,0.07339360117912293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,512,16,0,0.057145601511001586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,512,32,0,0.0373663991689682
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,512,64,0,0.04082559943199158
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,4,512,1,0,0.4561791896820068
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,512,2,0,0.2524751901626587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,512,4,0,0.14057120084762573
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,512,8,0,0.0859615981578827
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,512,16,0,0.07148799896240235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,512,32,0,0.04785920083522797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,512,64,0,0.0545087993144989
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,1024,8,0,0.1807039976119995
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,1024,4,0,0.34126720428466795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,1024,2,0,0.6438767910003662
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,1024,16,0,0.10132639408111573
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,1024,32,0,0.08097760081291198
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,1024,64,0,0.05582879781723023
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,4,1024,1,0,1.253659152984619
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,1024,4,0,0.3589168071746826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,1024,8,0,0.19702080488204957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,1024,2,0,0.6682720184326172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,1024,16,0,0.11780480146408082
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,1024,32,0,0.09778559803962708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,4,1024,1,0,1.2856399536132812
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,1024,64,0,0.07334240078926087
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,1536,16,0,0.18300800323486327
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,1536,8,0,0.33983678817749025
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,1536,4,0,0.6624720096588135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,1536,32,0,0.12342400550842285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,1536,2,0,1.2798128128051758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,1536,64,0,0.09060959815979004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,4,1536,1,0,2.4152271270751955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,1536,8,0,0.3610032081604004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,1536,4,0,0.6768991947174072
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,1536,2,0,1.2900256156921386
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,1536,16,0,0.21702079772949218
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,1536,32,0,0.14615039825439452
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,1536,64,0,0.11288479566574097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,4,1536,1,0,2.514156723022461
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,2048,8,0,0.5418848037719727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,2048,16,0,0.3076672077178955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,2048,4,0,1.0755375862121581
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,2048,32,0,0.16340160369873047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,2048,64,0,0.13825759887695313
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,2048,2,0,2.192945671081543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,2,16384,2,0,57.30912475585937
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,2048,8,0,0.6007311820983887
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,2048,4,0,1.096720027923584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,2048,32,0,0.19353120326995848
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,2048,16,0,0.3418512105941772
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,2048,2,0,2.1145423889160155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,4,2048,1,0,4.255083084106445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,2048,64,0,0.15707199573516845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,3072,16,0,0.5821087837219239
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,4,2048,1,0,4.205148696899414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,3072,8,0,1.1275471687316894
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,3072,32,0,0.31988000869750977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,3072,64,0,0.22193119525909424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,3072,4,0,2.2481311798095702
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,3072,8,0,1.1992464065551758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,3072,2,0,4.8605297088623045
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,3072,4,0,2.263049507141113
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,3072,16,0,0.6187903881072998
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,3072,32,0,0.368884801864624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,3072,64,0,0.2532991886138916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,3072,2,0,4.438011169433594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,4,3072,1,0,9.339141082763671
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,4,3072,1,0,8.650007629394532
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,4096,4,0,4.0805824279785154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,4096,8,0,1.9473392486572265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,4096,16,0,0.9586128234863281
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,4096,32,0,0.5568912029266357
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,4096,64,0,0.3237904071807861
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,4096,2,0,7.887516784667969
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,4096,16,0,0.9849103927612305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,4096,8,0,1.928116798400879
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,4096,4,0,3.8900001525878904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,4096,32,0,0.5481391906738281
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,4096,64,0,0.33011040687561033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,4096,2,0,7.816558074951172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,4,4096,1,0,15.587579345703125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,6144,16,0,2.048849678039551
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,6144,32,0,1.0706496238708496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,6144,8,0,4.459147262573242
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,6144,4,0,8.826436614990234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,6144,64,0,0.6540847778320312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,4,4096,1,0,15.333232116699218
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,6144,8,0,4.454019165039062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,6144,2,0,17.080894470214844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,6144,4,0,8.529142761230469
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,6144,16,0,2.2254928588867187
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,6144,32,0,1.1249695777893067
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,6144,64,0,0.6838064193725586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,2,16384,1,0,118.38983154296875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,6144,2,0,17.493960571289062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,4,8192,16,0,3.896753692626953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,4,8192,32,0,1.9756975173950195
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,4,8192,8,0,7.754808044433593
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,4,8192,64,0,1.0003664016723632
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,4,8192,4,0,15.62823486328125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,4,6144,1,0,35.688482666015624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,2,16384,1,0,113.54859619140625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,4,8192,16,0,3.7522048950195312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,4,6144,1,0,34.52925415039063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,4,8192,8,0,7.299983978271484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,8,16,1,0,0.030232000350952148
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,8,16,2,0,0.01764640063047409
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,8,16,4,0,0.016332800686359405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,4,8192,32,0,1.8595903396606446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,8,16,8,0,0.01627359986305237
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,4,8192,64,0,1.0465776443481445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,8,16,16,0,0.01613440066576004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,8,16,32,0,0.014363199472427368
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,8,16,64,0,0.013833600282669067
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,8,16,1,0,0.03945440053939819
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,8,16,2,0,0.02412319928407669
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,8,16,4,0,0.023284800350666046
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,8,16,8,0,0.023014399409294128
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,8,16,16,0,0.02125120013952255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,8,16,32,0,0.02134400010108948
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,8,16,64,0,0.020545600354671477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,8,32,1,0,0.049239999055862425
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,8,32,2,0,0.02876960039138794
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,8,32,4,0,0.01751679927110672
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,8,32,8,0,0.015967999398708344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,8,32,16,0,0.0160303995013237
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,8,32,32,0,0.015244799852371215
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,8,32,64,0,0.0141744002699852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,8,32,1,0,0.05985760092735291
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,8,32,2,0,0.037518399953842166
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,8,32,4,0,0.024128000438213348
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,8,32,8,0,0.02316800057888031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,8,32,16,0,0.023596799373626708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,8,32,32,0,0.02183839976787567
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,8,32,64,0,0.02104319930076599
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,8,64,1,0,0.08611840009689331
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,8,64,2,0,0.050012797117233276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,8,64,4,0,0.02699039876461029
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,8,64,8,0,0.017995199561119078
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,8,64,16,0,0.017166399955749513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,8,64,32,0,0.01671999990940094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,8,64,64,0,0.016011199355125426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,4,8192,4,0,14.413984680175782
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,4,8192,2,0,31.10930480957031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,8,64,1,0,0.09657599925994872
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,8,64,2,0,0.06061760187149048
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,8,64,4,0,0.038859200477600095
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,8,64,8,0,0.026447999477386474
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,8,64,16,0,0.026774400472640993
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,8,64,32,0,0.024273599684238433
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,8,64,64,0,0.0232464000582695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,8,128,1,0,0.1611407995223999
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,8,128,2,0,0.09093279838562011
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,8,128,4,0,0.054467201232910156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,8,128,8,0,0.03157599866390228
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,8,128,16,0,0.020099200308322906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,8,128,32,0,0.02001439929008484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,8,128,64,0,0.01976960003376007
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,8,128,1,0,0.17072000503540039
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,8,128,2,0,0.10160319805145264
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,8,128,4,0,0.06488320231437683
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,8,128,8,0,0.044100800156593324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,8,128,16,0,0.02815040051937103
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,8,128,32,0,0.02772960066795349
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,8,256,1,0,0.34355199337005615
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,8,128,64,0,0.02757599949836731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,8,256,2,0,0.18907999992370605
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,8,256,4,0,0.10281120538711548
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,8,256,8,0,0.061264002323150636
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,8,256,16,0,0.04035199880599975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,8,256,32,0,0.026203200221061707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,8,256,64,0,0.026583999395370483
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,8,256,1,0,0.35795519351959226
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,8,256,2,0,0.19939839839935303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,8,256,4,0,0.11874560117721558
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,8,256,8,0,0.0740880012512207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,8,256,16,0,0.05442399978637695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,8,256,32,0,0.037396800518035886
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,8,256,64,0,0.03633280098438263
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,8,512,4,0,0.24580800533294678
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,8,512,8,0,0.13257440328598022
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,8,512,2,0,0.45382080078125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,8,512,1,0,0.8575263977050781
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,8,512,16,0,0.09628000259399414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,8,512,32,0,0.06595360040664673
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,8,512,64,0,0.04357599914073944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,8,512,4,0,0.26320319175720214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,8,512,2,0,0.47355360984802247
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,8,512,8,0,0.14901920557022094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,8,512,1,0,0.876961612701416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,8,512,16,0,0.11384479999542237
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,8,512,32,0,0.08387200236320495
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,8,512,64,0,0.05920320153236389
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,8,1024,8,0,0.34701440334320066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,8,1024,4,0,0.6711631774902344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,8,1024,16,0,0.19132640361785888
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,8,1024,32,0,0.13871840238571168
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,8,1024,2,0,1.2876720428466797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,8,1024,64,0,0.09701759815216064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,8,1024,1,0,2.5122623443603516
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,8,1024,8,0,0.3691663980484009
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,8,1024,4,0,0.7004208087921142
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,8,1024,32,0,0.16168479919433593
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,8,1024,16,0,0.21752479076385497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,8,1024,64,0,0.12370719909667968
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,8,1024,2,0,1.3257712364196776
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,8,1024,1,0,2.520953559875488
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,8,1536,8,0,0.6646687984466553
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,8,1536,4,0,1.3005807876586915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,8,1536,32,0,0.23173921108245848
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,8,1536,16,0,0.36799519062042235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,8,1536,64,0,0.15737760066986084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,4,8192,2,0,29.846621704101562
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,8,1536,2,0,2.5582624435424806
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,8,1536,16,0,0.40407681465148926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,8,1536,8,0,0.7242784023284912
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,8,1536,4,0,1.3316831588745117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,8,1536,32,0,0.27056159973144533
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,8,1536,64,0,0.19365439414978028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,8,1536,2,0,2.600057601928711
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,8,1536,1,0,5.236217498779297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,8,2048,16,0,0.6300784111022949
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,8,2048,8,0,1.0816543579101563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,8,2048,32,0,0.33236639499664306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,8,2048,4,0,2.224582481384277
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,8,2048,64,0,0.2362287998199463
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,8,1536,1,0,4.931419372558594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,8,2048,8,0,1.0887855529785155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,8,2048,2,0,4.300336074829102
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,8,2048,4,0,2.178348731994629
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,8,2048,32,0,0.38272318840026853
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,8,2048,16,0,0.5943471908569335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,8,2048,64,0,0.26962080001831057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,8,2048,2,0,4.283752059936523
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,8,2048,1,0,8.691558074951171
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,8,3072,8,0,2.3208751678466797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,8,3072,16,0,1.1486111640930177
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,8,3072,4,0,4.677056121826172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,8,3072,64,0,0.4059120178222656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,8,3072,32,0,0.6817872047424316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,8,2048,1,0,8.380494689941406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,4,8192,1,0,60.68410034179688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,8,3072,8,0,2.2400432586669923
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,8,3072,16,0,1.285001564025879
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,8,3072,2,0,9.406230163574218
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,8,3072,32,0,0.7073215961456298
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,8,3072,64,0,0.4492176055908203
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,8,3072,4,0,4.609998321533203
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,8,3072,2,0,9.114582061767578
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,8,4096,8,0,3.9502094268798826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,8,4096,32,0,1.054531192779541
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,8,4096,16,0,1.9433712005615233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,8,4096,64,0,0.5756383895874023
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,8,3072,1,0,18.14460906982422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,8,4096,4,0,8.328008270263672
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,4,8192,1,0,58.4873291015625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,8,4096,16,0,1.9535039901733398
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,8,4096,8,0,3.9509166717529296
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,8,3072,1,0,17.848973083496094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,16,16,1,0,0.05497440099716187
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,8,4096,32,0,1.0712240219116211
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,8,4096,64,0,0.6565199851989746
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,16,16,2,0,0.028486400842666626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,16,16,4,0,0.017102399468421937
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,16,16,8,0,0.016363200545310975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,16,16,16,0,0.016521599888801575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,16,16,32,0,0.014446400105953217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,16,16,64,0,0.014259199798107147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,16,16,1,0,0.06042400002479553
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,16,16,2,0,0.03700799942016601
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,16,16,4,0,0.02420320063829422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,16,16,8,0,0.024120000004768372
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,16,16,16,0,0.022991999983787537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,16,16,32,0,0.021587200462818146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,16,16,64,0,0.02075839936733246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,16,32,1,0,0.08519039750099182
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,16,32,2,0,0.04983200132846832
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,8,4096,2,0,17.07671203613281
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,8,4096,4,0,8.15888671875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,16,32,4,0,0.027616000175476073
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,16,32,8,0,0.01746399998664856
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,16,32,16,0,0.02001439929008484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,16,32,32,0,0.018171200156211854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,16,32,64,0,0.015328000485897064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,16,32,1,0,0.09566239714622497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,16,32,2,0,0.06464639902114869
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,16,32,4,0,0.04185119867324829
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,16,32,8,0,0.025436800718307496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,16,32,16,0,0.024695999920368195
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,16,32,32,0,0.024849599599838255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,16,32,64,0,0.023369599878787995
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,16,64,1,0,0.152457594871521
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,16,64,2,0,0.08608959913253784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,16,64,4,0,0.0520576000213623
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,16,64,8,0,0.029684799909591674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,16,64,16,0,0.019223999977111817
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,16,64,32,0,0.018615999817848207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,16,64,1,0,0.1626320004463196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,16,64,2,0,0.09721279740333558
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,16,64,64,0,0.01860959976911545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,16,64,4,0,0.06327999830245971
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,16,64,8,0,0.04154880046844482
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,16,64,16,0,0.02821280062198639
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,16,64,32,0,0.02643679976463318
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,16,64,64,0,0.026420798897743226
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,16,128,1,0,0.3013375997543335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,16,128,2,0,0.1626320004463196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,16,128,4,0,0.0917792022228241
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,16,128,8,0,0.056852799654006955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,16,128,16,0,0.03600800037384033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,16,128,32,0,0.023214399814605713
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,16,128,64,0,0.023203200101852416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,8,4096,2,0,15.920819091796876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,16,128,1,0,0.3152208089828491
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,16,128,2,0,0.1764623999595642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,16,128,4,0,0.1048975944519043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,16,128,8,0,0.06947519779205322
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,16,128,16,0,0.05114079713821411
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,16,128,32,0,0.03698880076408386
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,16,128,64,0,0.03337759971618652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,16,256,2,0,0.3584975957870483
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,16,256,4,0,0.19615999460220337
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,16,256,32,0,0.04828960001468659
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,16,256,8,0,0.10882560014724732
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,16,256,1,0,0.6687280178070069
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,16,256,16,0,0.0693455994129181
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,16,256,64,0,0.0328031986951828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,16,256,4,0,0.21591041088104249
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,16,256,2,0,0.37511041164398196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,16,256,8,0,0.12335360050201416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,16,256,1,0,0.6873151779174804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,16,256,16,0,0.08545439839363098
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,16,256,32,0,0.06526079773902893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,16,256,64,0,0.04963679909706116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,16,512,8,0,0.25182878971099854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,16,512,16,0,0.17415519952774047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,16,512,4,0,0.4771984100341797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,16,512,32,0,0.1117408037185669
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,16,512,2,0,0.8862079620361328
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,16,512,64,0,0.07928640246391297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,16,512,1,0,1.7012479782104493
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,16,512,8,0,0.27558720111846924
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,16,512,4,0,0.5062975883483887
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,16,512,16,0,0.19663360118865966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,16,512,2,0,0.9185983657836914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,16,512,64,0,0.10438239574432373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,16,512,32,0,0.13513920307159424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,16,512,1,0,1.7277599334716798
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,16,1024,16,0,0.3654207944869995
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,16,1024,8,0,0.6780992031097413
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,16,1024,32,0,0.25277440547943114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,16,1024,64,0,0.16672799587249756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,16,1024,4,0,1.3591440200805665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,16,1024,2,0,2.637363243103027
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,8,4096,1,0,32.92713623046875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,16,1024,8,0,0.7376880168914794
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,16,1024,4,0,1.3646608352661134
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,16,1024,16,0,0.4360047817230225
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,16,1024,64,0,0.2122720003128052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,16,1024,32,0,0.31854400634765623
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,16,1024,2,0,2.6101280212402345
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,16,1024,1,0,5.148571014404297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,16,1536,16,0,0.6873904228210449
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,16,1536,8,0,1.3052528381347657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,16,1536,64,0,0.2767535924911499
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,16,1536,32,0,0.4428880214691162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,16,1024,1,0,5.139847946166992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,16,1536,4,0,2.6768863677978514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,16,1536,8,0,1.4200639724731445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,8,4096,1,0,31.069259643554688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,16,1536,2,0,5.430879974365235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,16,1536,16,0,0.7605279922485352
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,16,1536,4,0,2.616713523864746
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,16,1536,64,0,0.35139200687408445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,16,1536,32,0,0.5084432125091553
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,16,1536,2,0,5.230815887451172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,16,2048,8,0,2.1295055389404296
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,16,2048,16,0,1.1029664039611817
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,16,2048,32,0,0.6393936157226563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,16,2048,64,0,0.41367201805114745
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,16,1536,1,0,10.580918121337891
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,16,2048,4,0,4.507843017578125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,16,2048,16,0,1.22041597366333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,16,1536,1,0,10.236787414550781
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,16,2048,8,0,2.1690576553344725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,32,16,1,0,0.0896511971950531
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,16,2048,32,0,0.6828239917755127
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,32,16,2,0,0.05027520060539246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,16,2048,64,0,0.4888336181640625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,32,16,4,0,0.030814400315284728
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,32,16,8,0,0.018174399435520173
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,32,16,16,0,0.017548799514770508
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,32,16,32,0,0.016771200299263
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,16,2048,4,0,4.345991897583008
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,32,16,64,0,0.015638400614261628
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,16,2048,2,0,8.844258880615234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,32,16,1,0,0.09634400010108948
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,32,16,2,0,0.06138719916343689
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,32,16,4,0,0.04420959949493408
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,32,16,8,0,0.025911998748779298
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,32,16,32,0,0.024659200012683867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,32,16,16,0,0.025400000810623168
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,32,16,64,0,0.022920000553131103
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,32,32,1,0,0.15377119779586793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,32,32,2,0,0.0859824001789093
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,32,32,4,0,0.05277119874954224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,32,32,8,0,0.03223679959774017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,32,32,16,0,0.01964000016450882
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,32,32,32,0,0.019262400269508363
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,32,32,64,0,0.01849759966135025
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,32,32,1,0,0.1654752016067505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,32,32,2,0,0.09865760207176208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,32,32,4,0,0.06425920128822327
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,32,32,8,0,0.0423007994890213
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,32,32,16,0,0.027822399139404298
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,32,32,32,0,0.027369600534439088
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,32,32,64,0,0.02640480101108551
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,16,2048,2,0,8.667120361328125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,32,64,1,0,0.2897984027862549
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,32,64,2,0,0.15443040132522584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,32,64,4,0,0.09003199934959412
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,32,64,8,0,0.055644798278808597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,32,64,16,0,0.034790399670600894
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,32,64,32,0,0.02438880056142807
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,32,64,64,0,0.02250239998102188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,32,64,2,0,0.1672287940979004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,32,64,1,0,0.3014319896697998
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,32,64,4,0,0.10377119779586792
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,32,64,8,0,0.06883040070533752
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,32,64,16,0,0.04915040135383606
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,32,64,32,0,0.03410399854183197
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,32,64,64,0,0.032278400659561154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,32,128,2,0,0.3134511947631836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,32,128,4,0,0.16658560037612916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,32,128,8,0,0.09839360117912292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,32,128,1,0,0.5883056163787842
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,32,128,16,0,0.06425439715385436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,32,128,32,0,0.04383200109004974
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,32,128,64,0,0.030395200848579405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,32,128,2,0,0.3279616117477417
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,32,128,4,0,0.18304799795150756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,32,128,32,0,0.06124640107154846
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,32,128,8,0,0.11635520458221435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,32,128,1,0,0.6042255878448486
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,32,128,16,0,0.08147519826889038
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,32,128,64,0,0.04585599899291992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,16,2048,1,0,17.767437744140626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,32,256,8,0,0.19928319454193116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,32,256,2,0,0.7042304039001465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,32,256,16,0,0.12107360363006592
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,32,256,4,0,0.3812495946884155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,32,256,32,0,0.08141120076179505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,32,256,64,0,0.06207039952278137
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,32,256,1,0,1.319651222229004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,32,256,8,0,0.2251039981842041
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,32,256,16,0,0.14523680210113527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,32,256,4,0,0.40613760948181155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,32,256,2,0,0.7294608116149902
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,32,256,32,0,0.10788480043411255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,32,256,64,0,0.08672000169754028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,32,256,1,0,1.3590815544128418
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,32,512,8,0,0.4945807933807373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,16,2048,1,0,16.686524963378908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,32,512,32,0,0.20142560005187987
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,32,512,16,0,0.3261087894439697
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,32,512,64,0,0.1399824023246765
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,32,512,4,0,0.9523551940917969
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,32,512,2,0,1.7555328369140626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,32,512,32,0,0.24732959270477295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,32,512,16,0,0.368942403793335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,32,512,8,0,0.5389776229858398
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,32,512,4,0,0.9788687705993653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,32,512,64,0,0.18467040061950685
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,32,512,1,0,3.4359073638916016
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,32,512,2,0,1.8352272033691406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,32,1024,32,0,0.47795519828796384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,32,1024,16,0,0.745908784866333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,32,1024,8,0,1.3674192428588867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,32,512,1,0,3.4504112243652343
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,32,1024,64,0,0.3062623977661133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,32,1024,4,0,2.6347951889038086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,32,1024,32,0,0.5546175956726074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,32,1024,8,0,1.4095775604248046
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,32,1024,16,0,0.8090096473693847
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,64,16,1,0,0.15469440221786498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,64,16,2,0,0.09038559794425964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,32,1024,64,0,0.38751840591430664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,32,1024,4,0,2.6951087951660155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,64,16,4,0,0.0527567982673645
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,64,16,8,0,0.03129599988460541
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,64,16,32,0,0.019233599305152893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,32,1024,2,0,5.15145263671875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,64,16,16,0,0.020740799605846405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,64,16,64,0,0.018313600122928618
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,64,16,1,0,0.1681023955345154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,64,16,2,0,0.09970399737358093
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,64,16,8,0,0.04447839856147766
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,64,16,4,0,0.06955680251121521
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,64,16,16,0,0.02934719920158386
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,64,16,32,0,0.027187201380729675
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,64,16,64,0,0.025678399205207824
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,64,32,2,0,0.15944479703903197
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,64,32,1,0,0.2912368059158325
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,64,32,4,0,0.09182559847831726
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,32,1024,2,0,5.256967926025391
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,64,32,8,0,0.05666080117225647
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,64,32,16,0,0.0348688006401062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,64,32,1,0,0.303601598739624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,64,32,32,0,0.02353599965572357
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,64,32,64,0,0.02259040027856827
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,64,32,4,0,0.10506720542907715
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,64,32,2,0,0.179094398021698
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,64,32,8,0,0.06902400255203248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,64,32,16,0,0.05049759745597839
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,64,32,32,0,0.034215998649597165
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,64,32,64,0,0.03304480016231537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,64,64,2,0,0.3069888114929199
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,64,64,4,0,0.16116800308227539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,64,64,1,0,0.5676239967346192
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,64,64,8,0,0.09860320091247558
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,64,64,16,0,0.06266080141067505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,64,64,32,0,0.04301919937133789
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,32,1024,1,0,10.568985748291016
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,64,64,64,0,0.030129599571228027
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,64,64,4,0,0.1808176040649414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,64,64,2,0,0.3121824026107788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,64,64,8,0,0.11488800048828125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,64,64,1,0,0.5803167819976807
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,64,64,16,0,0.08057119846343994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,64,64,64,0,0.04580639898777008
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,64,64,32,0,0.06048160195350647
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,64,128,4,0,0.31772000789642335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,64,128,8,0,0.1802559971809387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,32,1024,1,0,10.225204467773438
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,64,128,16,0,0.11369919776916504
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,64,128,2,0,0.6070576190948487
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,64,128,32,0,0.07733759880065919
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,64,128,64,0,0.05702080130577088
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,64,128,1,0,1.162827205657959
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,64,128,8,0,0.20678238868713378
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,64,128,2,0,0.6348544120788574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,64,128,16,0,0.13769760131835937
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,64,128,32,0,0.10192960500717163
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,64,128,4,0,0.3469583988189697
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,64,128,64,0,0.08211039900779724
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,64,128,1,0,1.1815855979919434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,64,256,8,0,0.38323040008544923
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,64,256,32,0,0.15015519857406617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,64,256,16,0,0.22715680599212645
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,64,256,64,0,0.11036479473114014
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,64,256,4,0,0.7457871913909913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,64,256,2,0,1.3856240272521974
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,64,256,16,0,0.27107360363006594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,64,256,8,0,0.4265423774719238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,64,256,32,0,0.19311039447784423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,64,256,64,0,0.1553056001663208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,64,256,4,0,0.7940671920776368
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,64,256,1,0,2.621918487548828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,64,256,2,0,1.4239487648010254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,64,512,64,0,0.25657761096954346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,64,512,32,0,0.3785968065261841
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,64,512,16,0,0.6415008068084717
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,64,512,8,0,0.9676655769348145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,64,256,1,0,2.6574079513549806
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,64,512,4,0,1.8746255874633788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,64,512,8,0,1.050374412536621
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,64,512,16,0,0.7100912094116211
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,64,512,2,0,3.565856170654297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,64,512,4,0,1.9782400131225586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,64,512,32,0,0.4638319969177246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,64,512,64,0,0.3367727994918823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,128,16,1,0,0.3116911888122559
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,128,16,2,0,0.15859839916229249
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,128,16,4,0,0.09398559927940368
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,128,16,8,0,0.057417601346969604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,128,16,16,0,0.037031999230384825
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,128,16,32,0,0.024307200312614442
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,128,16,64,0,0.022977599501609804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,64,512,2,0,3.570150375366211
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,128,16,1,0,0.3029344081878662
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,128,16,2,0,0.17250239849090576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,128,16,4,0,0.10589599609375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,128,16,8,0,0.07039039731025695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,128,16,16,0,0.053198397159576416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,128,16,32,0,0.0352400004863739
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,128,16,64,0,0.033108800649642944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,128,32,2,0,0.2945199966430664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,128,32,4,0,0.16535359621047974
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,64,512,1,0,6.9038642883300785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,128,32,8,0,0.09932640194892883
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,128,32,1,0,0.5698512077331543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,128,32,16,0,0.06330239772796631
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,128,32,32,0,0.04334239959716797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,128,32,64,0,0.030502399802207945
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,128,32,2,0,0.3127552032470703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,128,32,4,0,0.18083679676055908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,128,32,8,0,0.11360160112380982
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,128,32,16,0,0.08008159995079041
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,128,32,1,0,0.591977596282959
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,128,32,32,0,0.060014402866363524
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,64,512,1,0,6.910020446777343
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,128,32,64,0,0.04586719870567322
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,128,64,4,0,0.30881760120391843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,128,64,16,0,0.1118783950805664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,128,64,2,0,0.5843679904937744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,128,64,8,0,0.17847199440002443
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,128,64,64,0,0.0558351993560791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,128,64,32,0,0.0765999972820282
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,128,64,1,0,1.122760009765625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,128,64,4,0,0.33389599323272706
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,128,64,8,0,0.20503199100494385
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,128,64,16,0,0.1371840000152588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,128,64,2,0,0.5993599891662598
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,128,64,32,0,0.10122560262680054
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,128,64,64,0,0.08167679905891419
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,128,64,1,0,1.14552001953125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,128,128,8,0,0.34721438884735106
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,128,128,16,0,0.21079680919647217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,128,128,4,0,0.6190815925598144
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,128,128,32,0,0.14317760467529297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,128,128,64,0,0.10639359951019287
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,128,128,2,0,1.2027903556823731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,128,128,8,0,0.3891184091567993
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,128,128,16,0,0.2524399995803833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,128,128,32,0,0.1854032039642334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,128,128,4,0,0.6634960174560547
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,128,128,64,0,0.1499408006668091
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,128,128,1,0,2.2953727722167967
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,128,128,2,0,1.2480496406555175
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,128,256,16,0,0.4356239795684814
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,128,128,1,0,2.3353567123413086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,128,256,32,0,0.2894927978515625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,128,256,64,0,0.20572481155395508
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,128,256,8,0,0.7495200157165527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,128,256,4,0,1.4758848190307616
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,128,256,16,0,0.5163392066955567
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,128,256,32,0,0.3609744071960449
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,128,256,8,0,0.8279744148254394
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,128,256,2,0,2.7401296615600588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,128,256,64,0,0.28523199558258056
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,128,256,4,0,1.5566720008850097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,256,16,2,0,0.295251202583313
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,256,16,1,0,0.5718783855438232
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,256,16,8,0,0.09982399940490723
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,256,16,4,0,0.17344000339508056
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,256,16,16,0,0.06732320189476013
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,256,16,32,0,0.043024000525474546
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,256,16,64,0,0.035652801394462585
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,128,256,2,0,2.802289581298828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,256,16,2,0,0.3127311944961548
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,256,16,8,0,0.11642719507217407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,256,16,4,0,0.18429280519485475
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,256,16,1,0,0.5787680149078369
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,128,256,1,0,5.214614486694336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,256,16,16,0,0.08071680068969726
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,256,16,64,0,0.050139200687408444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,256,16,32,0,0.06310399770736694
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,256,32,16,0,0.11285599470138549
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,256,32,4,0,0.3131664037704468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,256,32,8,0,0.1811951994895935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,256,32,32,0,0.07702720165252686
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,256,32,2,0,0.5856143951416015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,256,32,64,0,0.056148797273635864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,256,32,1,0,1.1156847953796387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,256,32,8,0,0.20352799892425538
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,128,256,1,0,5.293507385253906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,256,32,4,0,0.3397631883621216
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,256,32,1,0,1.143012809753418
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,256,32,16,0,0.1397487998008728
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,256,32,2,0,0.606115198135376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,256,32,32,0,0.1017024040222168
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,256,32,64,0,0.08230559825897217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,256,64,8,0,0.34161760807037356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,256,64,16,0,0.20972800254821777
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,256,64,4,0,0.6086431980133057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,256,64,32,0,0.14213600158691406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,256,64,64,0,0.10508320331573487
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,256,64,2,0,1.1519184112548828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,256,64,8,0,0.38303360939025877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,256,64,16,0,0.25233280658721924
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,256,64,4,0,0.6491695880889893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,256,64,32,0,0.18611199855804444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,256,64,64,0,0.14867520332336426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,256,64,1,0,2.2187679290771483
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,256,64,2,0,1.183415985107422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,4,256,128,32,0,0.2651087999343872
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,8,256,128,16,0,0.40557279586791994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,16,256,128,8,0,0.6715904235839844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,256,64,1,0,2.2536479949951174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,2,256,128,64,0,0.19674079418182372
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,32,256,128,4,0,1.217147159576416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,8,256,128,16,0,0.4807231903076172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,16,256,128,8,0,0.7539904117584229
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,64,256,128,2,0,2.379385566711426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,32,256,128,4,0,1.2997488021850585
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,4,256,128,32,0,0.34413120746612547
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,2,256,128,64,0,0.2773103952407837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,64,256,128,2,0,2.469601631164551
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,float16,128,256,128,1,0,4.578067016601563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_context,flash_attention,float16,fp8,128,256,128,1,0,4.636579132080078
