framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,1,1,0,0.012884800136089326
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,1,2,0,0.012167999893426895
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,1,4,0,0.012083200365304947
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,1,8,0,0.012086399644613267
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,1,1,0,0.01839199960231781
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,1,2,0,0.0176816001534462
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,1,4,0,0.017791999876499175
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,1,32,0,0.012110400199890136
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,1,64,0,0.011908800154924393
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,1,16,0,0.012062399834394454
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,1,8,0,0.017564800381660462
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,1,16,0,0.01767359972000122
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,1,32,0,0.017819200456142426
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,1,64,0,0.017691199481487275
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,16,1,0,0.013964800536632538
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,16,2,0,0.013775999844074249
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,16,4,0,0.013638399541378021
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,16,8,0,0.013711999356746673
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,16,16,0,0.013415999710559845
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,16,32,0,0.012894399464130402
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,16,64,0,0.012664000689983367
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,16,1,0,0.020006400346755982
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,16,2,0,0.019622400403022766
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,16,4,0,0.019592000544071196
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,16,8,0,0.019219200313091277
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,16,16,0,0.019118399918079378
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,16,32,0,0.018680000305175783
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,16,64,0,0.018611200153827667
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,32,1,0,0.014983999729156493
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,32,2,0,0.01419519931077957
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,32,4,0,0.013846400380134582
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,32,8,0,0.013993600010871887
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,32,16,0,0.013617600500583648
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,32,32,0,0.013406400382518769
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,32,64,0,0.012859199941158295
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,32,1,0,0.020894399285316466
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,32,2,0,0.01966879963874817
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,32,4,0,0.019483199715614317
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,32,8,0,0.019444799423217772
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,32,16,0,0.01963520050048828
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,32,32,0,0.019044800102710722
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,32,64,0,0.018881599605083465
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,64,1,0,0.015411199629306793
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,64,2,0,0.014892800152301789
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,64,4,0,0.014745600521564484
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,64,8,0,0.013756799697875976
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,64,8,0,0.01974239945411682
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,64,16,0,0.013742400705814362
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,64,32,0,0.013755199313163758
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,64,64,0,0.01337919980287552
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,64,1,0,0.0218639999628067
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,64,2,0,0.020983999967575072
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,64,4,0,0.020032000541687012
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,64,16,0,0.01956000030040741
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,64,32,0,0.01966399997472763
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,64,64,0,0.019124799966812135
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,128,1,0,0.017377600073814392
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,128,2,0,0.01650400012731552
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,128,4,0,0.016415999829769136
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,128,8,0,0.01552480012178421
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,128,16,0,0.015471999347209931
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,128,32,0,0.015409600734710694
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,128,64,0,0.014697599411010741
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,128,1,0,0.02362399995326996
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,128,2,0,0.023028799891471864
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,128,4,0,0.02260479927062988
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,128,8,0,0.021478399634361267
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,128,16,0,0.02130240052938461
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,128,32,0,0.02131039947271347
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,128,64,0,0.02007199972867966
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,256,1,0,0.028574401140213014
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,256,64,0,0.017990399897098542
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,256,2,0,0.027169600129127502
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,256,2,0,0.020612800121307374
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,256,4,0,0.020121599733829498
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,256,8,0,0.019176000356674196
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,256,16,0,0.01916159987449646
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,256,32,0,0.019014400243759156
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,256,1,0,0.035569599270820616
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,256,4,0,0.026684799790382387
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,256,8,0,0.025992000102996828
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,256,16,0,0.025785601139068602
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,256,32,0,0.025119999051094057
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,256,64,0,0.023503999412059783
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,512,1,0,0.0669376015663147
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,512,2,0,0.03550879955291748
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,512,4,0,0.02738400101661682
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,512,8,0,0.03517760038375854
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,512,16,0,0.028182399272918702
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,512,32,0,0.023603199422359465
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,512,64,0,0.02245279997587204
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,512,1,0,0.07663679718971253
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,512,2,0,0.043926399946212766
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,512,4,0,0.03403359949588776
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,512,8,0,0.04329760074615478
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,512,16,0,0.03530080020427704
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,512,32,0,0.030825600028038025
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,512,64,0,0.029524800181388856
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,1024,1,0,0.17093440294265747
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,1024,2,0,0.09147040247917175
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,1024,4,0,0.04924319982528687
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,1024,8,0,0.04077439904212952
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,1024,16,0,0.04342719912528992
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,1024,32,0,0.032067200541496275
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,1024,64,0,0.027447998523712158
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,1024,1,0,0.18145279884338378
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,1024,2,0,0.10052319765090942
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,1024,4,0,0.05920799970626831
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,1024,8,0,0.047988799214363095
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,1024,16,0,0.05220800042152405
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,1024,32,0,0.04010080099105835
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,1024,64,0,0.034775999188423154
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,1536,1,0,0.3473520040512085
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,1536,2,0,0.1682144045829773
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,1536,4,0,0.09431520104408264
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,1536,8,0,0.05526880025863647
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,1536,16,0,0.05942879915237427
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,1536,32,0,0.04264000058174133
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,1536,64,0,0.034281599521636966
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,1536,1,0,0.3329328060150146
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,1536,2,0,0.18291200399398805
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,1536,4,0,0.10426239967346192
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,1536,8,0,0.06326879858970642
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,1536,16,0,0.0702127993106842
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,1536,32,0,0.05317599773406982
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,1536,64,0,0.043224000930786134
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,2048,1,0,0.5263855934143067
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,2048,2,0,0.26859359741210936
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,2048,4,0,0.15066720247268678
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,2048,8,0,0.07585920095443725
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,2048,16,0,0.06784319877624512
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,2048,32,0,0.05822399854660034
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,2048,64,0,0.04140479862689972
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,2048,1,0,0.5500304222106933
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,2048,2,0,0.2823647975921631
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,2048,4,0,0.15233759880065917
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,2048,8,0,0.08966879844665528
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,2048,16,0,0.07796159982681275
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,2048,32,0,0.06998559832572937
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,2048,64,0,0.05159199833869934
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,3072,1,0,1.0669808387756348
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,3072,2,0,0.5864048004150391
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,3072,4,0,0.28181440830230714
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,3072,8,0,0.15953119993209838
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,3072,16,0,0.10027199983596802
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,3072,32,0,0.08181759715080261
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,3072,64,0,0.060108798742294314
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,3072,1,0,1.0770367622375487
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,3072,2,0,0.5537856101989747
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,3072,4,0,0.30088798999786376
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,3072,8,0,0.1737007975578308
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,3072,16,0,0.1089967966079712
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,3072,32,0,0.09546239972114563
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,3072,64,0,0.0739359974861145
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,4096,1,0,1.839019203186035
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,4096,2,0,0.9185456275939942
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,4096,4,0,0.46535520553588866
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,4096,8,0,0.2590384006500244
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,4096,16,0,0.1332159996032715
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,4096,32,0,0.12344640493392944
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,4096,64,0,0.09013760089874268
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,4096,1,0,1.7866512298583985
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,4096,2,0,0.9112095832824707
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,6144,1,0,4.8945472717285154
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,4096,4,0,0.47744479179382326
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,4096,8,0,0.2592704057693481
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,4096,16,0,0.15132319927215576
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,4096,32,0,0.13692959547042846
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,4096,64,0,0.1045024037361145
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,6144,2,0,2.0135984420776367
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,6144,4,0,0.9867983818054199
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,6144,8,0,0.572657585144043
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,6144,16,0,0.28994719982147216
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,6144,32,0,0.1855039954185486
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,6144,64,0,0.13332799673080445
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,6144,32,0,0.20413599014282227
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,6144,1,0,4.0456687927246096
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,6144,2,0,1.9233232498168946
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,6144,4,0,0.984881591796875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,6144,8,0,0.5177248001098633
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,6144,16,0,0.3170079946517944
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,6144,64,0,0.1491312026977539
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,8192,1,0,8.705811309814454
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,8192,2,0,3.6826831817626955
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,8192,4,0,1.7613967895507812
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,8192,8,0,0.8640303611755371
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,8192,16,0,0.45479679107666016
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,8192,32,0,0.24364640712738037
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,8192,64,0,0.24036319255828859
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,8192,1,0,7.117473602294922
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,8192,4,0,1.7216751098632812
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,8192,2,0,3.9717601776123046
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,8192,8,0,0.8800352096557618
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,8192,16,0,0.4666111946105957
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,8192,32,0,0.26559040546417234
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,8192,64,0,0.2539360046386719
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,10240,1,0,13.853944396972656
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,10240,4,0,2.646017646789551
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,10240,2,0,5.613475036621094
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,10240,8,0,1.4707216262817382
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,10240,16,0,0.7017183780670166
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,10240,32,0,0.3921823978424072
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,10240,64,0,0.29786560535430906
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,10240,32,0,0.4196591854095459
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,10240,1,0,11.347382354736329
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,10240,2,0,6.195819091796875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,10240,4,0,2.5853824615478516
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,10240,8,0,1.3294976234436036
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,10240,16,0,0.7556672096252441
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,10240,64,0,0.318342399597168
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,12288,1,0,16.932470703125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,12288,2,0,8.448648071289062
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,12288,4,0,4.092824172973633
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,12288,8,0,2.226158332824707
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,12288,16,0,0.9726351737976074
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,12288,32,0,0.5448095798492432
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,12288,64,0,0.3723135948181152
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,12288,1,0,15.75035400390625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,12288,2,0,7.860984039306641
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,12288,4,0,3.999435043334961
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,12288,8,0,1.8986640930175782
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,12288,16,0,1.092846393585205
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,12288,32,0,0.5766928195953369
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,12288,64,0,0.3805039882659912
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,16384,2,0,14.608642578125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,16384,1,0,34.733786010742186
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,16384,4,0,7.3922172546386715
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,16384,8,0,3.4726593017578127
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,16384,16,0,1.7078256607055664
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,16384,32,0,0.9923664093017578
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,16384,64,0,0.47467679977416993
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,16384,1,0,27.976345825195313
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,16384,2,0,13.944448852539063
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,16384,4,0,8.073836517333984
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,16384,8,0,3.364574432373047
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,16384,16,0,1.6910064697265625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,16384,32,0,0.8870719909667969
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,16384,64,0,0.5396096229553222
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,32768,4,0,28.776937866210936
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,32768,64,0,1.7359535217285156
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,32768,8,0,17.044953918457033
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,32768,16,0,7.328884887695312
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,32768,2,0,59.028289794921875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,32768,2,0,55.935675048828124
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,32768,32,0,3.417435073852539
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,32768,32,0,3.509969711303711
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,32768,1,0,120.07037353515625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,32768,4,0,28.299322509765624
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,32768,8,0,16.28694305419922
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,32768,16,0,7.061924743652344
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,32768,64,0,1.7256576538085937
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,1,1,0,0.01273919939994812
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,32768,1,0,134.6531005859375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,1,2,0,0.012139199674129486
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,1,4,0,0.012174399942159653
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,1,8,0,0.012095999717712403
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,1,16,0,0.011868800222873687
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,1,32,0,0.012028799951076507
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,1,64,0,0.012169600278139115
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,1,1,0,0.018267199397087097
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,1,2,0,0.01786559969186783
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,1,4,0,0.01754239946603775
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,1,8,0,0.017073599994182585
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,1,16,0,0.01746080070734024
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,1,32,0,0.01725119948387146
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,1,64,0,0.017396800220012665
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,16,1,0,0.014963200688362122
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,16,2,0,0.013967999815940857
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,16,4,0,0.01385599970817566
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,16,8,0,0.013833600282669067
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,16,16,0,0.01316159963607788
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,16,32,0,0.012887999415397644
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,16,64,0,0.012580800056457519
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,16,1,0,0.021087999641895293
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,16,2,0,0.019892799854278564
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,16,4,0,0.01979680061340332
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,16,8,0,0.019543999433517457
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,16,16,0,0.01897920072078705
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,16,32,0,0.018675200641155243
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,16,64,0,0.018508799374103546
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,32,1,0,0.015187199413776397
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,32,2,0,0.014771200716495514
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,32,4,0,0.014035199582576752
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,32,8,0,0.01398559957742691
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,32,16,0,0.013819199800491334
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,32,32,0,0.013265599310398103
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,32,64,0,0.012894399464130402
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,32,1,0,0.021675199270248413
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,32,2,0,0.020916800200939178
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,32,4,0,0.019750399887561797
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,32,8,0,0.01974879950284958
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,32,16,0,0.01995680034160614
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,32,32,0,0.01945440024137497
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,32,64,0,0.01855839937925339
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,64,1,0,0.015905599296092986
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,64,2,0,0.0156016007065773
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,64,4,0,0.015118399262428283
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,64,8,0,0.015004800260066986
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,64,16,0,0.013758400082588195
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,64,32,0,0.013944000005722046
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,64,64,0,0.013628800213336945
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,64,1,0,0.02244960069656372
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,64,2,0,0.021639999747276307
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,64,4,0,0.02117920070886612
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,64,8,0,0.020052799582481386
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,64,16,0,0.019700799882411957
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,64,32,0,0.01958879977464676
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,128,1,0,0.0253711998462677
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,64,64,0,0.019566400349140166
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,128,2,0,0.017759999632835387
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,128,4,0,0.017108799517154695
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,128,8,0,0.016663999855518342
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,128,16,0,0.016353599727153778
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,128,32,0,0.01571040004491806
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,128,64,0,0.015014399588108063
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,128,1,0,0.03369919955730438
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,256,1,0,0.053827202320098876
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,128,2,0,0.024377599358558655
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,128,4,0,0.023343999683856965
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,128,8,0,0.02268480062484741
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,128,16,0,0.022993600368499754
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,128,32,0,0.021620799601078034
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,128,64,0,0.020587199926376344
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,256,2,0,0.02938719987869263
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,256,4,0,0.021209600567817687
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,256,8,0,0.020558400452136992
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,256,16,0,0.020449599623680113
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,256,32,0,0.01992799937725067
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,256,64,0,0.01875839978456497
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,256,1,0,0.0637776017189026
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,256,2,0,0.03903520107269287
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,256,4,0,0.027822399139404298
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,256,8,0,0.0271807998418808
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,256,16,0,0.026921600103378296
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,256,32,0,0.026236799359321595
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,256,64,0,0.02462079972028732
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,512,64,0,0.02531839907169342
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,512,1,0,0.12080800533294678
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,512,2,0,0.06629760265350342
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,512,4,0,0.036873599886894225
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,512,8,0,0.0335312008857727
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,512,16,0,0.037217599153518674
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,512,32,0,0.029820799827575684
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,512,1,0,0.13196799755096436
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,512,2,0,0.07685440182685851
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,512,4,0,0.04543839991092682
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,512,8,0,0.04059999883174896
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,512,16,0,0.04586879909038544
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,512,32,0,0.03716639876365661
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,512,64,0,0.03221279978752136
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,1024,1,0,0.32534079551696776
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,1024,1,0,0.3360608100891113
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,1024,2,0,0.17114559412002564
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,1024,4,0,0.09268959760665893
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,1024,8,0,0.052590399980545044
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,1024,16,0,0.04871360063552856
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,1024,16,0,0.059303998947143555
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,1024,32,0,0.04591200053691864
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,1024,64,0,0.034780800342559814
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,1024,2,0,0.18320319652557374
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,1024,4,0,0.10486880540847779
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,1024,8,0,0.06471040248870849
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,1024,32,0,0.05722560286521912
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,1024,64,0,0.045259198546409606
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,1536,1,0,0.6342495918273926
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,1536,2,0,0.3245136022567749
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,1536,4,0,0.18182079792022704
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,1536,8,0,0.09758080244064331
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,1536,16,0,0.06628959774971008
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,1536,32,0,0.0723904013633728
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,1536,64,0,0.04766559898853302
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,1536,1,0,0.6584015846252441
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,1536,2,0,0.3382575988769531
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,1536,4,0,0.18372800350189208
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,1536,8,0,0.11281759738922119
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,2048,8,0,0.14766080379486085
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,1536,16,0,0.07832639813423156
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,1536,32,0,0.08640480041503906
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,1536,64,0,0.06048319935798645
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,2048,1,0,1.0289216041564941
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,2048,2,0,0.5601727962493896
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,2048,4,0,0.2740864038467407
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,2048,16,0,0.0853007972240448
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,2048,32,0,0.0792847990989685
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,2048,64,0,0.06480000019073487
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,2048,1,0,1.0378416061401368
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,2048,2,0,0.537724781036377
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,2048,4,0,0.2855855941772461
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,2048,8,0,0.16514240503311156
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,2048,16,0,0.09934080243110657
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,2048,32,0,0.09309920072555541
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,2048,64,0,0.08079360127449035
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,3072,1,0,2.110174369812012
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,3072,2,0,1.0715567588806152
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,3072,4,0,0.5497168064117431
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,3072,8,0,0.2886176109313965
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,3072,16,0,0.17325279712677003
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,3072,32,0,0.1146880030632019
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,3072,64,0,0.10439679622650147
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,3072,1,0,2.1461231231689455
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,3072,2,0,1.0765664100646972
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,3072,4,0,0.5590672016143798
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,3072,8,0,0.307696008682251
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,3072,16,0,0.18563679456710816
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,3072,32,0,0.13541760444641113
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,3072,64,0,0.12326719760894775
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,4096,1,0,4.348775863647461
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,4096,4,0,0.9285455703735351
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,4096,2,0,1.857111930847168
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,4096,8,0,0.4772511959075928
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,4096,16,0,0.25886240005493166
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,4096,32,0,0.14724479913711547
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,4096,64,0,0.1437343955039978
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,4096,1,0,3.6745937347412108
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,4096,2,0,2.0737871170043944
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,4096,4,0,0.9284208297729493
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,4096,8,0,0.49667038917541506
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,4096,16,0,0.278766393661499
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,4096,32,0,0.1697391986846924
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,4096,64,0,0.1662224054336548
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,6144,1,0,10.131180572509766
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,6144,2,0,4.153737640380859
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,6144,4,0,2.3438720703125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,6144,4,0,1.999470329284668
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,6144,8,0,1.014748764038086
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,6144,16,0,0.5253439903259277
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,6144,32,0,0.30867838859558105
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,6144,64,0,0.21561920642852783
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,6144,1,0,8.337737274169921
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,6144,2,0,4.5188945770263675
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,6144,8,0,1.1274592399597168
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,6144,16,0,0.5567359924316406
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,6144,32,0,0.33696959018707273
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,6144,64,0,0.2417232036590576
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,8192,1,0,15.071066284179688
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,8192,2,0,7.583049774169922
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,8192,8,0,1.9852943420410156
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,8192,4,0,3.644643020629883
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,8192,16,0,1.0229840278625488
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,8192,32,0,0.4816256046295166
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,8192,64,0,0.27173280715942383
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,8192,2,0,7.07824478149414
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,8192,1,0,14.433853149414062
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,8192,8,0,1.9416831970214843
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,8192,4,0,3.574212646484375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,8192,16,0,0.9136079788208008
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,8192,32,0,0.5533631801605224
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,8192,64,0,0.3057904005050659
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,10240,2,0,11.560359954833984
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,10240,4,0,5.724671936035156
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,10240,8,0,3.0642864227294924
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,10240,1,0,23.622096252441406
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,10240,16,0,1.3780207633972168
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,10240,32,0,0.7358943939208984
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,10240,64,0,0.46379680633544923
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,10240,2,0,11.055350494384765
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,10240,1,0,22.524087524414064
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,10240,4,0,5.549446487426758
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,10240,8,0,3.0609567642211912
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,10240,16,0,1.3856672286987304
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,12288,8,0,4.079824066162109
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,12288,2,0,16.663768005371093
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,12288,32,0,1.0156559944152832
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,10240,32,0,0.7747119903564453
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,10240,64,0,0.4644144058227539
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,12288,2,0,16.181996154785157
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,12288,4,0,8.499756622314454
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,12288,16,0,2.232574462890625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,12288,64,0,0.5897808074951172
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,12288,1,0,40.849667358398435
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,12288,4,0,8.001312255859375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,12288,1,0,31.648678588867188
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,12288,16,0,1.9674032211303711
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,12288,8,0,3.9952911376953124
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,12288,32,0,1.0488832473754883
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,12288,64,0,0.6669151782989502
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,16384,2,0,35.963232421875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,16384,16,0,3.453473663330078
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,16384,8,0,7.2713134765625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,16384,4,0,14.989961242675781
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,16384,8,0,6.920375823974609
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,16384,32,0,1.7585840225219727
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,16384,64,0,0.9321871757507324
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,16384,1,0,59.93751831054688
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,16384,4,0,14.345738220214844
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,16384,16,0,3.5038288116455076
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,16384,32,0,1.7611135482788085
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,16384,2,0,34.03179931640625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,16384,64,0,0.9728927612304688
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,16384,1,0,66.72323608398438
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,32768,16,0,14.622322082519531
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,32768,32,0,8.681419372558594
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,32768,64,0,3.662083053588867
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,32768,8,0,29.750372314453124
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,32768,4,0,60.193389892578125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,32768,16,0,14.32353515625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,32768,32,0,7.107321929931641
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,32768,8,0,28.401470947265626
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,32768,64,0,3.6811264038085936
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,1,1,0,0.013126400113105775
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,1,2,0,0.012678399682044983
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,32768,2,0,118.9077880859375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,1,4,0,0.012254399806261062
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,32768,4,0,67.51091918945312
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,1,8,0,0.01220960021018982
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,1,32,0,0.012166400253772736
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,1,16,0,0.012151999771595002
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,1,64,0,0.01204639971256256
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,1,1,0,0.018503999710083006
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,1,2,0,0.018334400653839112
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,32768,2,0,112.4491943359375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,1,4,0,0.017683200538158417
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,1,8,0,0.01754239946603775
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,1,16,0,0.017566399276256563
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,1,32,0,0.017688000202178956
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,1,64,0,0.017793600261211396
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,16,1,0,0.0149167999625206
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,16,2,0,0.014983999729156493
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,16,4,0,0.014577600359916686
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,32768,1,0,239.381103515625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,16,8,0,0.013937599956989288
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,16,16,0,0.01350879967212677
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,16,32,0,0.013219200074672699
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,16,64,0,0.012708799540996551
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,16,1,0,0.02139520049095154
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,16,2,0,0.021004800498485566
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,16,4,0,0.020318399369716644
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,16,8,0,0.019808000326156615
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,32768,1,0,265.578125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,16,16,0,0.019414399564266206
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,16,32,0,0.019144000113010408
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,16,64,0,0.018649600446224213
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,32,1,0,0.015806399285793304
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,32,2,0,0.015270400047302245
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,32,4,0,0.015300799906253815
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,32,8,0,0.01422239989042282
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,32,16,0,0.013891200721263885
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,32,32,0,0.013625599443912506
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,32,64,0,0.013070400059223174
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,32,1,0,0.02256480008363724
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,32,2,0,0.021692800521850585
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,32,4,0,0.021374399960041045
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,32,8,0,0.020147199928760528
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,32,16,0,0.01979839950799942
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,32,32,0,0.019457599520683287
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,32,64,0,0.0188960000872612
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,64,1,0,0.024089600145816802
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,64,2,0,0.01642719954252243
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,64,4,0,0.015428799390792846
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,64,8,0,0.01563200056552887
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,64,16,0,0.014396800100803376
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,64,32,0,0.014281600713729858
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,64,64,0,0.01385280042886734
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,64,1,0,0.031220799684524535
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,64,4,0,0.02204640060663223
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,128,2,0,0.027102398872375488
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,64,2,0,0.022913600504398345
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,64,8,0,0.02184319943189621
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,64,16,0,0.02022559940814972
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,64,32,0,0.020084799826145174
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,64,64,0,0.019889600574970245
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,128,1,0,0.04780319929122925
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,128,4,0,0.018303999304771425
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,128,8,0,0.017788800597190856
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,128,16,0,0.017481599748134614
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,128,32,0,0.017113600671291352
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,128,64,0,0.01600639969110489
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,128,1,0,0.05836960077285767
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,128,2,0,0.03822399973869324
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,128,4,0,0.024750399589538574
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,128,8,0,0.024120000004768372
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,128,16,0,0.02401120066642761
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,128,32,0,0.023227199912071228
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,128,64,0,0.021593600511550903
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,256,1,0,0.09593279957771302
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,256,2,0,0.05531200170516968
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,256,4,0,0.03418239951133728
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,256,8,0,0.02264160066843033
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,256,16,0,0.0221343994140625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,256,32,0,0.022152000665664674
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,256,64,0,0.02101760059595108
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,256,1,0,0.10592160224914551
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,256,2,0,0.06492800116539002
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,256,4,0,0.04209280014038086
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,256,8,0,0.030044800043106078
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,256,16,0,0.029238399863243104
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,256,32,0,0.029023998975753786
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,256,64,0,0.027049601078033447
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,512,1,0,0.22552800178527832
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,512,2,0,0.12177920341491699
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,512,4,0,0.06912959814071655
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,512,8,0,0.054206401109695435
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,512,16,0,0.036652800440788266
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,512,32,0,0.03949759900569916
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,512,64,0,0.03295679986476898
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,512,2,0,0.13351520299911498
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,512,1,0,0.23974719047546386
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,512,4,0,0.08105599880218506
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,512,8,0,0.06671199798583985
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,512,16,0,0.047233599424362185
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,512,32,0,0.051497602462768556
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,512,64,0,0.04274719953536987
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,1024,1,0,0.6332111835479737
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,1024,2,0,0.32892000675201416
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,1024,4,0,0.18374240398406982
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,1024,8,0,0.09860960245132447
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,1024,16,0,0.07760800123214721
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,1024,32,0,0.0549776017665863
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,1024,64,0,0.05232639908790589
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,1024,1,0,0.654966402053833
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,1024,2,0,0.34361441135406495
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,1024,4,0,0.19062720537185668
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,1024,8,0,0.11574879884719849
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,1024,16,0,0.09459840059280396
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,1024,32,0,0.06869440078735352
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,1024,64,0,0.06858239769935608
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,1536,1,0,1.3445695877075194
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,1536,2,0,0.6389552116394043
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,1536,4,0,0.32808001041412355
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,1536,8,0,0.17958240509033202
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,1536,16,0,0.12553600072860718
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,1536,32,0,0.08723520040512085
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,1536,64,0,0.08474239706993103
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,1536,1,0,1.261030387878418
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,1536,2,0,0.6712399959564209
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,1536,4,0,0.35016798973083496
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,1536,8,0,0.1995519995689392
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,1536,16,0,0.14064160585403443
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,1536,32,0,0.10807839632034302
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,1536,64,0,0.10419679880142212
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,2048,1,0,2.0760448455810545
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,2048,2,0,1.0399248123168945
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,2048,4,0,0.5689824104309082
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,2048,8,0,0.28731839656829833
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,2048,4,0,0.5522863864898682
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,2048,16,0,0.16003520488739015
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,2048,32,0,0.12846720218658447
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,2048,64,0,0.09255040287971497
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,2048,1,0,2.0618928909301757
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,2048,2,0,1.049294376373291
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,2048,8,0,0.314299201965332
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,2048,16,0,0.18474080562591552
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,2048,32,0,0.1519503951072693
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,2048,64,0,0.11719199419021606
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,3072,2,0,2.5457168579101563
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,3072,1,0,4.527342224121094
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,3072,4,0,1.0812671661376954
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,3072,8,0,0.567952013015747
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,3072,4,0,1.0948351860046386
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,3072,16,0,0.32827680110931395
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,3072,32,0,0.21069600582122802
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,3072,64,0,0.15346239805221557
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,4096,1,0,7.900433349609375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,3072,1,0,4.359780883789062
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,3072,2,0,2.3535104751586915
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,3072,8,0,0.589851188659668
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,3072,16,0,0.34057440757751467
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,3072,32,0,0.2377471923828125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,3072,64,0,0.18425120115280152
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,4096,2,0,3.6822879791259764
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,4096,4,0,1.8922431945800782
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,4096,16,0,0.5624095916748046
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,4096,8,0,0.9435456275939942
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,4096,32,0,0.2856559991836548
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,4096,64,0,0.23071839809417724
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,4096,1,0,7.5517021179199215
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,4096,2,0,4.275268936157227
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,6144,2,0,8.716919708251954
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,4096,4,0,1.862816047668457
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,4096,8,0,0.971827220916748
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,4096,16,0,0.5826911926269531
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,4096,32,0,0.32114400863647463
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,4096,64,0,0.2677311897277832
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,6144,1,0,17.48528289794922
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,6144,4,0,4.634455871582031
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,6144,8,0,2.0479135513305664
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,6144,16,0,1.0533712387084961
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,6144,32,0,0.5877999782562255
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,6144,64,0,0.411516809463501
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,6144,2,0,8.358433532714844
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,6144,1,0,16.50581512451172
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,6144,4,0,4.083707046508789
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,6144,8,0,2.2187616348266603
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,8192,2,0,15.238165283203125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,6144,32,0,0.6331615924835206
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,6144,16,0,1.0769984245300293
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,6144,64,0,0.4409359931945801
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,8192,4,0,7.4639137268066404
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,8192,1,0,37.117086791992186
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,8192,16,0,2.049716758728027
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,8192,8,0,3.7524513244628905
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,8192,32,0,0.9542863845825196
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,8192,64,0,0.5296735763549805
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,8192,1,0,29.33360900878906
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,8192,4,0,7.212673950195312
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,8192,2,0,14.729342651367187
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,8192,8,0,4.19793586730957
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,8192,16,0,2.022604751586914
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,8192,32,0,1.0059247970581056
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,8192,64,0,0.590990400314331
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,10240,4,0,11.711991882324218
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,10240,2,0,23.872352600097656
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,10240,1,0,48.301483154296875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,10240,8,0,5.782766342163086
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,10240,16,0,3.4161697387695313
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,10240,32,0,1.599179172515869
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,10240,64,0,0.8217151641845704
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,10240,4,0,11.438222503662109
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,10240,2,0,22.331828308105468
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,10240,8,0,5.6624095916748045
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,10240,16,0,2.779484748840332
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,10240,1,0,45.45321960449219
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,10240,32,0,1.6891584396362305
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,10240,64,0,0.9498895645141602
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,12288,4,0,16.6522705078125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,12288,8,0,8.55966567993164
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,12288,2,0,34.1297119140625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,12288,16,0,4.18516960144043
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,12288,1,0,68.66035766601563
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,12288,32,0,2.0355920791625977
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,12288,64,0,1.2740127563476562
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,12288,4,0,16.386203002929687
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,12288,2,0,31.919857788085938
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,12288,8,0,8.203272247314453
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,12288,16,0,4.160340881347656
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,12288,32,0,2.0739376068115236
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,12288,64,0,1.2028863906860352
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,12288,1,0,76.32841796875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,16384,8,0,15.120489501953125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,16384,64,0,1.8731327056884766
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,16384,16,0,7.508971405029297
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,16384,4,0,29.446148681640626
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,16384,32,0,3.5415679931640627
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,16384,2,0,60.88447875976563
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,16384,16,0,7.205971527099609
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,16384,8,0,14.531288146972656
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,16384,4,0,28.331182861328124
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,16384,32,0,3.716707229614258
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,16384,64,0,1.952859115600586
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,1,1,0,0.013035200536251068
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,1,2,0,0.012593600153923034
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,16384,1,0,143.628173828125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,16384,2,0,58.15736083984375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,1,4,0,0.01239359974861145
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,1,8,0,0.012249600142240524
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,1,16,0,0.012052799761295318
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,1,32,0,0.012166400253772736
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,1,64,0,0.012064000219106674
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,16384,1,0,134.68001708984374
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,1,1,0,0.018918399512767792
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,1,64,0,0.018382400274276733
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,1,2,0,0.018459199368953703
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,1,4,0,0.018214400112628936
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,1,8,0,0.018131199479103088
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,1,16,0,0.018016000092029572
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,1,32,0,0.0179407998919487
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,16,1,0,0.01594240069389343
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,16,2,0,0.01520639955997467
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,16,4,0,0.015150399506092071
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,16,8,0,0.014095999300479889
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,16,4,0,0.021324799954891206
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,16,16,0,0.013583999872207642
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,16,32,0,0.013276800513267517
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,16,64,0,0.013193599879741669
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,16,1,0,0.022329600155353548
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,16,2,0,0.021559999883174898
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,16,8,0,0.020286400616168977
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,16,16,0,0.019788800179958342
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,16,32,0,0.019096000492572783
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,16,64,0,0.018838399648666383
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,32,1,0,0.024716800451278685
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,32,2,0,0.016649599373340606
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,32,4,0,0.015569600462913512
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,32,8,0,0.01525759994983673
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,32,16,0,0.014528000354766845
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,32,32,0,0.014207999408245086
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,32,64,0,0.013766400516033173
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,32,1,0,0.033934399485588074
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,32,2,0,0.02282879948616028
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,32,4,0,0.0220768004655838
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,32,8,0,0.021862399578094483
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,32,16,0,0.02083519995212555
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,32,32,0,0.020043200254440306
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,32,64,0,0.01974560022354126
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,64,1,0,0.045510399341583255
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,64,2,0,0.024993599951267244
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,64,4,0,0.016897599399089813
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,64,16,0,0.016312000155448914
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,64,8,0,0.016249600052833556
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,64,32,0,0.01549919992685318
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,64,64,0,0.01485760062932968
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,64,1,0,0.05547680258750916
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,64,2,0,0.03265120089054108
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,64,4,0,0.02389120012521744
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,64,8,0,0.023104000091552734
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,64,16,0,0.022486400604248048
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,64,32,0,0.02156960070133209
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,64,64,0,0.021132799983024596
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,128,1,0,0.0838703989982605
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,128,2,0,0.04933759868144989
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,128,4,0,0.028513601422309874
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,128,8,0,0.019780799746513367
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,128,16,0,0.019441600143909454
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,128,32,0,0.01921759992837906
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,128,64,0,0.017975999414920805
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,128,1,0,0.09437599778175354
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,128,2,0,0.05961599946022034
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,128,4,0,0.03926079869270325
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,128,8,0,0.026340800523757934
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,128,16,0,0.026182401180267333
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,128,32,0,0.02619200050830841
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,128,64,0,0.02450399994850159
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,256,1,0,0.17763999700546265
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,256,2,0,0.09799360036849976
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,256,4,0,0.058631998300552365
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,256,8,0,0.03707039952278137
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,256,16,0,0.025515198707580566
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,256,32,0,0.025271999835968017
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,256,64,0,0.025065600872039795
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,256,1,0,0.18882080316543579
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,256,2,0,0.11026719808578492
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,256,4,0,0.07023040056228638
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,256,8,0,0.05009920001029968
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,256,16,0,0.03554239869117737
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,256,32,0,0.034862399101257324
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,256,64,0,0.03409439921379089
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,512,1,0,0.43656001091003416
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,512,2,0,0.2314527988433838
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,512,4,0,0.1274880051612854
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,512,8,0,0.09384480118751526
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,512,16,0,0.06305279731750488
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,512,32,0,0.04288159906864166
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,512,64,0,0.04706720113754272
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,512,1,0,0.4524672031402588
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,512,2,0,0.24754559993743896
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,512,4,0,0.1431488037109375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,512,8,0,0.10991679430007935
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,512,16,0,0.07994239926338195
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,512,32,0,0.05704960227012634
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,512,64,0,0.0629360020160675
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,1024,1,0,1.3728480339050293
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,1024,2,0,0.6460127830505371
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,1024,4,0,0.3376703977584839
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,1024,8,0,0.18664480447769166
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,1024,16,0,0.1364240050315857
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,1024,32,0,0.09580640196800232
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,1024,64,0,0.06852959990501403
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,1024,1,0,1.2693679809570313
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,1024,2,0,0.6980815887451172
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,1024,4,0,0.36061439514160154
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,1024,8,0,0.21103360652923583
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,1024,16,0,0.15872160196304322
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,1024,32,0,0.11997120380401612
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,1024,64,0,0.0927232027053833
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,1536,1,0,2.489134407043457
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,1536,2,0,1.2596431732177735
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,1536,4,0,0.7113711833953857
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,1536,8,0,0.34574079513549805
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,1536,16,0,0.2267103910446167
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,1536,32,0,0.1533360004425049
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,1536,64,0,0.11589920520782471
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,1536,1,0,2.7622608184814452
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,1536,2,0,1.2898655891418458
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,1536,4,0,0.6706736087799072
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,1536,8,0,0.39229118824005127
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,1536,16,0,0.2573024034500122
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,1536,32,0,0.18471360206604004
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,1536,64,0,0.1461632013320923
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,2048,1,0,4.342705535888672
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,2048,2,0,2.242313575744629
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,2048,4,0,1.0576208114624024
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,2048,8,0,0.5573296070098877
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,2048,16,0,0.33216960430145265
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,2048,32,0,0.22265119552612306
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,2048,64,0,0.16348960399627685
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,2048,1,0,4.124135971069336
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,2048,2,0,2.0968816757202147
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,2048,4,0,1.1456048011779785
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,2048,8,0,0.5976208209991455
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,2048,16,0,0.3491247892379761
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,2048,32,0,0.26719839572906495
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,2048,64,0,0.20154879093170167
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,3072,1,0,9.429927825927734
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,3072,2,0,4.470444869995117
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,3072,4,0,2.170140838623047
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,3072,8,0,1.2259712219238281
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,3072,16,0,0.6098544120788574
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,3072,32,0,0.3862031936645508
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,3072,64,0,0.27906720638275145
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,3072,1,0,8.832084655761719
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,3072,2,0,4.441113662719727
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,3072,4,0,2.1930431365966796
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,4096,2,0,7.991828918457031
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,4096,1,0,19.100840759277343
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,3072,8,0,1.1648431777954102
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,3072,16,0,0.6894080162048339
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,3072,32,0,0.44594559669494627
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,3072,64,0,0.32314560413360593
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,4096,4,0,3.8740463256835938
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,4096,8,0,1.8980640411376952
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,4096,16,0,1.0013039588928223
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,4096,32,0,0.6175263881683349
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,4096,64,0,0.3980448007583618
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,4096,2,0,7.795985412597656
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,4096,1,0,17.722135925292967
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,4096,4,0,3.8882606506347654
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,4096,8,0,1.9319023132324218
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,4096,16,0,1.0522576332092286
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,4096,32,0,0.6259984016418457
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,4096,64,0,0.4855504035949707
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,6144,4,0,10.155217742919922
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,6144,2,0,17.729405212402344
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,6144,8,0,4.150033569335937
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,6144,2,0,19.706912231445312
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,6144,1,0,35.67142944335937
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,6144,16,0,2.09539680480957
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,6144,32,0,1.1303392410278321
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,6144,64,0,0.8141023635864257
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,6144,64,0,0.7151535987854004
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,6144,4,0,8.477792358398437
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,6144,8,0,4.162499237060547
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,6144,1,0,40.40806274414062
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,6144,16,0,2.4093791961669924
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,6144,32,0,1.2259599685668945
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,8192,2,0,30.80284729003906
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,8192,4,0,15.322468566894532
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,8192,8,0,7.700481414794922
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,8192,1,0,60.94630737304688
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,8192,16,0,3.818880081176758
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,8192,32,0,2.190603256225586
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,8192,64,0,1.135636806488037
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,8192,2,0,29.446920776367186
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,8192,4,0,14.76208038330078
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,8192,8,0,7.503684997558594
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,8192,1,0,59.551995849609376
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,8192,16,0,3.734142303466797
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,8192,32,0,1.9937664031982423
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,8192,64,0,1.2839808464050293
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,1,1,0,0.013689599931240082
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,1,2,0,0.012681600451469422
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,1,4,0,0.012470400333404541
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,1,8,0,0.012620800733566284
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,1,16,0,0.012276799976825714
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,1,32,0,0.012671999633312225
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,1,64,0,0.0124719999730587
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,1,1,0,0.018592000007629395
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,1,2,0,0.018291200697422027
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,1,4,0,0.018027199804782866
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,1,8,0,0.018172800540924072
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,1,16,0,0.018147200345993042
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,1,32,0,0.01798879951238632
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,1,64,0,0.01804320067167282
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,16,1,0,0.02513119876384735
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,16,2,0,0.016068799793720244
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,16,4,0,0.015515199303627015
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,16,8,0,0.015316799283027649
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,16,16,0,0.014270399510860444
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,16,32,0,0.013592000305652618
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,16,64,0,0.013527999818325042
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,16,1,0,0.03436320126056671
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,16,2,0,0.02274720072746277
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,16,4,0,0.022247999906539917
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,16,8,0,0.02210720032453537
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,16,16,0,0.020956799387931824
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,16,32,0,0.019900800287723543
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,16,64,0,0.019412800669670105
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,32,1,0,0.04594399929046631
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,32,2,0,0.024897600710391998
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,32,4,0,0.016782400012016297
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,32,8,0,0.016675199568271636
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,32,16,0,0.016302399337291718
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,32,32,0,0.015137599408626556
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,32,64,0,0.014444799721240997
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,32,1,0,0.055611199140548705
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,32,2,0,0.03272800147533417
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,32,4,0,0.024111999571323393
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,32,8,0,0.022950400412082673
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,32,16,0,0.022913600504398345
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,32,32,0,0.02117920070886612
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,32,64,0,0.02088800072669983
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,64,1,0,0.08065279722213745
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,64,2,0,0.047044798731803894
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,64,4,0,0.026051199436187743
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,64,8,0,0.01857440024614334
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,64,16,0,0.01804320067167282
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,64,32,0,0.01780800074338913
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,64,64,0,0.017025600373744964
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,64,1,0,0.09047840237617492
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,64,2,0,0.057011198997497556
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,64,4,0,0.034748798608779906
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,64,8,0,0.026129600405693055
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,64,16,0,0.02499520033597946
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,64,32,0,0.024675199389457704
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,64,64,0,0.023494400084018707
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,128,1,0,0.15267200469970704
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,128,2,0,0.08604480028152466
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,128,4,0,0.05220000147819519
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,128,8,0,0.03222399950027466
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,128,16,0,0.02284960001707077
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,128,32,0,0.022580799460411072
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,128,64,0,0.021886399388313292
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,128,1,0,0.16552640199661256
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,128,2,0,0.09800959825515747
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,128,4,0,0.06450719833374023
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,128,8,0,0.04466400146484375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,128,16,0,0.03259679973125458
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,128,32,0,0.03195840120315552
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,128,64,0,0.03103039860725403
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,256,1,0,0.33897759914398196
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,256,2,0,0.18496160507202147
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,256,4,0,0.10261600017547608
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,256,8,0,0.06525599956512451
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,256,16,0,0.04456160068511963
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,256,32,0,0.032150399684906
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,256,64,0,0.03136320114135742
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,256,1,0,0.35470240116119384
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,256,2,0,0.19929759502410888
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,256,4,0,0.11867519617080688
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,256,8,0,0.0814512014389038
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,256,16,0,0.061179202795028684
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,256,32,0,0.04538240134716034
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,256,64,0,0.04464159905910492
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,512,1,0,0.865999984741211
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,512,2,0,0.4512432098388672
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,512,4,0,0.2406383991241455
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,512,4,0,0.2637919902801514
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,512,8,0,0.1734655976295471
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,512,16,0,0.10872800350189209
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,512,32,0,0.0773584008216858
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,512,64,0,0.056176000833511354
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,512,1,0,0.888212776184082
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,512,2,0,0.47861437797546386
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,512,8,0,0.19288959503173828
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,512,16,0,0.13157600164413452
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,512,32,0,0.10122560262680054
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,512,64,0,0.08046879768371581
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,1024,1,0,2.5591840744018555
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,1024,2,0,1.2807567596435547
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,1024,4,0,0.667409610748291
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,1024,8,0,0.3652384042739868
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,1024,16,0,0.2485680103302002
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,1024,32,0,0.16500799655914306
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,1024,64,0,0.12523360252380372
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,1024,1,0,2.7665359497070314
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,1024,2,0,1.3152655601501464
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,1024,4,0,0.6998960018157959
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,1536,2,0,2.828623962402344
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,1024,8,0,0.40445919036865235
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,1024,16,0,0.2890448093414307
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,1024,32,0,0.20439999103546141
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,1024,64,0,0.1640112042427063
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,1536,1,0,5.887838363647461
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,1536,4,0,1.282310390472412
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,1536,8,0,0.6804848194122315
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,1536,16,0,0.4498079776763916
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,1536,32,0,0.2791968107223511
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,1536,64,0,0.19886560440063478
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,1536,1,0,5.103163146972657
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,1536,2,0,2.5372848510742188
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,1536,4,0,1.434558391571045
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,1536,8,0,0.7341087818145752
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,1536,16,0,0.48686561584472654
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,1536,32,0,0.33690879344940183
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,1536,64,0,0.25642080307006837
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,2048,1,0,8.864860534667969
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,2048,2,0,4.350064086914062
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,2048,4,0,2.077840042114258
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,2048,8,0,1.2373536109924317
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,2048,16,0,0.6069568157196045
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,2048,32,0,0.4079728126525879
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,2048,64,0,0.28945279121398926
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,2048,1,0,8.552140808105468
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,2048,2,0,4.3050495147705075
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,2048,4,0,2.1481344223022463
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,2048,8,0,1.1591008186340332
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,3072,8,0,2.287191963195801
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,2048,16,0,0.7227007865905761
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,2048,32,0,0.4804431915283203
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,2048,64,0,0.3565999984741211
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,3072,1,0,21.889100646972658
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,3072,2,0,9.49709243774414
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,3072,4,0,4.671529769897461
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,3072,16,0,1.1883040428161622
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,3072,32,0,0.7400576114654541
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,3072,64,0,0.5122047901153565
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,3072,1,0,18.13232421875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,3072,2,0,8.994767761230468
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,3072,4,0,5.1810768127441404
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,3072,8,0,2.331488037109375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,3072,16,0,1.288708782196045
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,3072,32,0,0.8407135963439941
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,3072,64,0,0.588372802734375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,4096,4,0,8.097022247314452
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,4096,2,0,16.31965026855469
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,4096,1,0,39.23576354980469
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,4096,8,0,4.574663925170898
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,4096,16,0,2.01070556640625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,4096,32,0,1.1085760116577148
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,4096,64,0,0.7235104084014893
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,4096,2,0,15.43563232421875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,4096,4,0,7.861154937744141
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,4096,1,0,31.225738525390625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,4096,8,0,4.600276947021484
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,4096,16,0,2.298244857788086
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,4096,32,0,1.2341327667236328
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,4096,64,0,0.8664112091064453
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,1,1,0,0.014347200095653535
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,1,2,0,0.013395200669765472
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,1,4,0,0.013038399815559387
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,1,8,0,0.012902399897575379
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,1,16,0,0.012950399518013
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,1,32,0,0.013145600259304047
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,1,64,0,0.012892800569534301
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,1,1,0,0.020342400670051573
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,1,2,0,0.018932799994945525
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,1,4,0,0.018777599930763243
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,1,8,0,0.01873279958963394
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,1,16,0,0.018638400733470915
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,16,16,0,0.01641920059919357
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,1,32,0,0.018783999979496
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,1,64,0,0.018768000602722167
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,16,1,0,0.04590719938278198
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,16,2,0,0.026787200570106508
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,16,4,0,0.017608000338077544
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,16,8,0,0.016897599399089813
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,16,32,0,0.015279999375343323
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,16,64,0,0.015097600221633912
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,16,1,0,0.05583680272102356
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,16,2,0,0.03562079966068268
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,16,4,0,0.024345600605010988
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,16,8,0,0.023892800509929656
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,16,16,0,0.022910399734973906
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,16,32,0,0.021704000234603883
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,16,64,0,0.021532799303531646
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,32,32,0,0.018156799674034118
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,32,1,0,0.08042399883270264
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,32,2,0,0.04738079905509949
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,32,4,0,0.02725600004196167
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,32,16,0,0.018532800674438476
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,32,8,0,0.01894560009241104
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,32,64,0,0.016811199486255646
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,32,1,0,0.09133920073509216
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,32,2,0,0.05780159831047058
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,32,4,0,0.034518399834632875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,32,8,0,0.02556000053882599
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,32,16,0,0.025241601467132568
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,32,32,0,0.02483839988708496
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,32,64,0,0.023783999681472778
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,64,1,0,0.14470080137252808
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,64,2,0,0.08401920199394226
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,64,4,0,0.05035679936408997
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,64,8,0,0.029836800694465638
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,64,16,0,0.021913599967956544
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,64,32,0,0.021779200434684752
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,64,64,0,0.021587200462818146
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,64,1,0,0.15661120414733887
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,64,2,0,0.09567360281944275
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,64,4,0,0.062427198886871337
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,64,8,0,0.04175359904766083
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,64,16,0,0.03143840134143829
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,64,32,0,0.0312032014131546
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,64,64,0,0.031011199951171874
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,128,1,0,0.2926543951034546
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,128,2,0,0.15726239681243898
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,128,4,0,0.09238719940185547
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,128,8,0,0.05899999737739563
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,128,16,0,0.03984160125255585
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,128,32,0,0.029497599601745604
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,128,32,0,0.04294399917125702
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,128,64,0,0.02917119860649109
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,128,1,0,0.30887041091918943
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,128,2,0,0.1733440041542053
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,128,4,0,0.1087183952331543
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,128,8,0,0.074755197763443
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,128,16,0,0.05655519962310791
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,128,64,0,0.0419295996427536
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,256,1,0,0.6652095794677735
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,256,2,0,0.35849120616912844
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,256,4,0,0.1928655982017517
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,256,8,0,0.11636799573898315
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,256,16,0,0.0790224015712738
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,256,32,0,0.058766400814056395
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,256,64,0,0.04515039920806885
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,256,1,0,0.6887648105621338
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,256,2,0,0.37920958995819093
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,256,4,0,0.21662399768829346
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,256,8,0,0.13990240097045897
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,256,16,0,0.10264480113983154
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,256,32,0,0.08264960050582885
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,256,64,0,0.06648480296134948
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,512,1,0,1.7033775329589844
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,512,2,0,0.8966176033020019
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,512,4,0,0.4697984218597412
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,512,8,0,0.3315232038497925
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,512,16,0,0.2016688108444214
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,512,32,0,0.13627200126647948
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,512,64,0,0.10521279573440552
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,512,1,0,1.7422672271728517
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,512,2,0,0.9338640213012696
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,512,4,0,0.5086559772491455
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,512,8,0,0.36106719970703127
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,512,16,0,0.2410736083984375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,512,32,0,0.17676639556884766
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,512,64,0,0.14434720277786256
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,1024,1,0,5.209011077880859
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,1024,2,0,2.6059951782226562
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,1024,4,0,1.3075471878051759
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,1024,8,0,0.7049903869628906
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,1024,16,0,0.47341279983520507
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,1024,32,0,0.31495840549468995
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,1024,64,0,0.2241760015487671
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,1024,1,0,5.152455902099609
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,1024,2,0,2.6474735260009767
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,1024,4,0,1.3812191963195801
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,1024,8,0,0.7792399883270263
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,1024,16,0,0.5476672172546386
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,1024,32,0,0.3794464111328125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,1536,32,0,0.5172319889068604
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,1024,64,0,0.3001983880996704
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,1536,64,0,0.35970239639282225
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,1536,1,0,10.266089630126952
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,1536,2,0,5.924316787719727
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,1536,2,0,5.246628952026367
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,1536,1,0,12.22848129272461
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,1536,16,0,1.0038432121276855
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,1536,32,0,0.6273519992828369
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,1536,4,0,2.561982345581055
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,1536,8,0,1.3466927528381347
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,1536,16,0,0.8376576423645019
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,2048,4,0,4.496430587768555
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,1536,4,0,2.649398422241211
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,1536,8,0,1.4501328468322754
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,1536,64,0,0.4781472206115723
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,2048,1,0,20.809123229980468
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,2048,2,0,8.979036712646485
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,2048,8,0,2.217880058288574
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,2048,16,0,1.199407958984375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,2048,32,0,0.7822015762329102
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,2048,64,0,0.5190703868865967
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,2048,1,0,17.083387756347655
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,2048,2,0,9.97831039428711
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,2048,4,0,5.031121444702149
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,2048,8,0,2.3238576889038085
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,2048,16,0,1.327622413635254
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,2048,32,0,0.9157391548156738
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,2048,64,0,0.6645503997802734
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,1,1,0,0.015422399342060088
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,1,2,0,0.01422239989042282
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,1,4,0,0.014091199636459351
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,1,8,0,0.014068800210952758
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,1,16,0,0.014095999300479889
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,1,32,0,0.01371999979019165
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,1,64,0,0.014019200205802917
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,1,1,0,0.021347199380397797
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,1,2,0,0.020153599977493285
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,1,4,0,0.019652800261974336
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,1,8,0,0.019812799990177155
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,1,16,0,0.019569599628448488
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,1,32,0,0.019729599356651306
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,1,64,0,0.01969760060310364
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,16,1,0,0.08294079899787903
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,16,1,0,0.09096480011940003
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,16,2,0,0.04864799976348877
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,16,4,0,0.02924799919128418
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,16,8,0,0.019766399264335634
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,16,16,0,0.018492799997329713
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,16,32,0,0.0177839994430542
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,16,64,0,0.017587199807167053
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,16,2,0,0.05802080035209656
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,16,4,0,0.03669439852237701
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,16,8,0,0.026555201411247252
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,16,16,0,0.025947201251983642
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,16,32,0,0.024753600358963013
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,16,64,0,0.024289600551128387
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,32,1,0,0.14621599912643432
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,32,1,0,0.15851839780807495
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,32,2,0,0.0864736020565033
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,32,4,0,0.05112640261650085
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,32,8,0,0.030788800120353697
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,32,16,0,0.022771200537681578
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,32,32,0,0.0218639999628067
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,32,64,0,0.02101760059595108
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,32,2,0,0.0965503990650177
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,32,4,0,0.06415039896965027
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,32,8,0,0.04279040098190308
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,32,16,0,0.032455998659133914
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,32,32,0,0.031095999479293823
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,32,64,0,0.030110400915145875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,64,1,0,0.2731839895248413
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,64,2,0,0.15171680450439454
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,64,4,0,0.09077280163764953
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,64,8,0,0.05798239707946777
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,64,16,0,0.038764798641204835
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,64,32,0,0.029054400324821473
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,64,64,0,0.028398400545120238
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,64,1,0,0.2921663999557495
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,64,2,0,0.1672767996788025
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,64,4,0,0.10663360357284546
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,64,8,0,0.0731119990348816
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,64,16,0,0.054788798093795776
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,64,32,0,0.042561599612236024
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,64,64,0,0.041921600699424744
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,128,1,0,0.5688896179199219
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,128,2,0,0.30075199604034425
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,128,4,0,0.16960639953613282
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,128,8,0,0.1054800033569336
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,128,16,0,0.0728767991065979
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,128,32,0,0.05464479923248291
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,128,64,0,0.043403199315071105
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,128,1,0,0.592633581161499
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,128,2,0,0.3211632013320923
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,128,4,0,0.19372479915618895
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,128,8,0,0.12929600477218628
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,128,16,0,0.09637920260429382
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,128,32,0,0.07808640003204345
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,128,64,0,0.06447200179100036
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,256,1,0,1.3283519744873047
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,256,2,0,0.7014815807342529
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,256,4,0,0.3700000047683716
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,256,4,0,0.4098991870880127
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,256,8,0,0.2211008071899414
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,256,16,0,0.14352480173110962
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,256,32,0,0.10631040334701539
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,256,64,0,0.08217920064926147
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,512,1,0,3.3879936218261717
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,256,1,0,1.3706048011779786
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,256,2,0,0.7449888229370117
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,256,8,0,0.2595632076263428
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,256,16,0,0.18362879753112793
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,256,32,0,0.14679039716720582
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,256,64,0,0.1195248007774353
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,512,2,0,1.9260160446166992
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,512,4,0,0.9248224258422851
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,512,8,0,0.6391695976257324
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,512,16,0,0.37807040214538573
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,512,32,0,0.2525536060333252
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,512,64,0,0.18909759521484376
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,512,1,0,3.4874000549316406
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,512,2,0,1.8510128021240235
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,512,4,0,1.0339776039123536
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,512,8,0,0.6983359813690185
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,512,16,0,0.45470237731933594
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,512,32,0,0.3285408020019531
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,512,64,0,0.2664783954620361
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,1024,1,0,10.620800018310547
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,1024,2,0,5.352923202514648
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,1024,4,0,2.659793663024902
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,1024,4,0,2.759115219116211
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,1024,2,0,5.276628875732422
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,1024,8,0,1.567246437072754
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,1024,16,0,0.9152064323425293
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,1024,32,0,0.5982848167419433
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,1024,64,0,0.4124351978302002
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,1024,1,0,10.338516998291016
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,1024,8,0,1.5329504013061523
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,1024,16,0,1.0671983718872071
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,1024,32,0,0.7247568130493164
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,1024,64,0,0.5634655952453613
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,1,1,0,0.017310400307178498
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,1,2,0,0.015798400342464446
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,1,4,0,0.01565600037574768
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,1,8,0,0.015508800745010376
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,1,16,0,0.015441599488258361
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,1,32,0,0.015857599675655365
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,128,1,64,0,0.015854400396347047
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,1,1,0,0.02273920029401779
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,1,2,0,0.022096000611782074
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,1,4,0,0.021524800360202788
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,1,8,0,0.02144159972667694
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,1,16,0,0.02147520035505295
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,1,32,0,0.021505600214004515
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,128,1,64,0,0.021297599375247955
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,16,1,0,0.14808160066604614
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,16,2,0,0.08533440232276916
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,16,4,0,0.052127999067306516
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,16,8,0,0.03346239924430847
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,16,16,0,0.023494400084018707
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,16,32,0,0.022152000665664674
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,128,16,64,0,0.02176479995250702
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,16,1,0,0.1582800030708313
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,16,2,0,0.09778239727020263
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,16,4,0,0.06387199759483338
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,16,8,0,0.04427039921283722
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,32,4,0,0.0919871985912323
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,16,16,0,0.033502399921417236
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,16,32,0,0.031673601269721983
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,128,16,64,0,0.031839999556541446
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,32,1,0,0.27585439682006835
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,32,2,0,0.15504319667816163
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,32,8,0,0.05833280086517334
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,32,16,0,0.03938719928264618
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,32,32,0,0.030239999294281006
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,128,32,64,0,0.028171199560165405
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,32,1,0,0.29524319171905516
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,32,2,0,0.16783360242843628
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,32,4,0,0.10666400194168091
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,32,8,0,0.07446560263633728
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,32,16,0,0.05606080293655395
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,32,32,0,0.04329760074615478
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,128,32,64,0,0.041580799221992495
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,64,1,0,0.5557744026184082
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,64,2,0,0.30264959335327146
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,64,4,0,0.1671056032180786
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,64,8,0,0.10373439788818359
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,64,16,0,0.070796799659729
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,64,32,0,0.05302240252494812
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,128,64,64,0,0.043884798884391785
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,64,1,0,0.557047986984253
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,64,2,0,0.31171839237213134
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,64,4,0,0.1908768057823181
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,64,8,0,0.1272655963897705
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,64,16,0,0.09435679912567138
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,64,32,0,0.0771232008934021
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,128,64,64,0,0.06652960181236267
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,128,1,0,1.1250160217285157
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,128,2,0,0.5827680110931397
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,128,4,0,0.32808480262756345
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,128,8,0,0.19868320226669312
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,128,16,0,0.13409440517425536
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,128,32,0,0.1004080057144165
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,128,128,64,0,0.0781503975391388
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,128,2,0,0.6215343952178956
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,128,1,0,1.1697823524475097
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,128,4,0,0.3655904054641724
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,128,8,0,0.23862080574035643
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,128,16,0,0.17494879961013793
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,128,32,0,0.1407855987548828
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,128,128,64,0,0.11574399471282959
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,256,1,0,2.6451648712158202
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,256,2,0,1.391881561279297
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,256,4,0,0.7461631774902344
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,256,8,0,0.42259521484375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,256,16,0,0.2744672060012817
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,256,32,0,0.19736479520797728
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,128,256,64,0,0.1498463988304138
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,256,1,0,2.6973360061645506
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,256,2,0,1.4724335670471191
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,256,4,0,0.7971968173980712
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,256,8,0,0.5013967990875244
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,256,16,0,0.3482095956802368
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,256,32,0,0.27424800395965576
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,512,16,0,0.7578864097595215
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,128,256,64,0,0.22581920623779297
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,512,1,0,6.961497497558594
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,512,2,0,3.584084701538086
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,512,4,0,1.8515663146972656
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,512,8,0,1.2253680229187012
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,512,32,0,0.47971677780151367
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,128,512,64,0,0.35984001159667967
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,128,512,64,0,0.501801586151123
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,512,2,0,3.699079895019531
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,512,1,0,7.957977294921875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,512,4,0,1.9738319396972657
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,512,8,0,1.3729184150695801
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,512,16,0,0.880833625793457
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,512,32,0,0.625167989730835
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,1,1,0,0.027033600211143493
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,1,2,0,0.023343999683856965
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,1,4,0,0.02304159998893738
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,1,8,0,0.02234400063753128
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,1,16,0,0.022368000447750093
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,1,32,0,0.02212799936532974
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,256,1,64,0,0.02248319983482361
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,1,1,0,0.0338591992855072
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,1,2,0,0.029393601417541503
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,1,4,0,0.02884480059146881
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,1,8,0,0.02864319980144501
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,1,16,0,0.02826879918575287
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,1,32,0,0.028385600447654723
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,256,1,64,0,0.028347200155258177
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,16,1,0,0.2828880071640015
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,16,2,0,0.153603196144104
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,16,4,0,0.09275519847869873
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,16,8,0,0.0599232017993927
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,16,16,0,0.04111039936542511
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,16,32,0,0.035627201199531555
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,256,16,64,0,0.03516960144042969
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,16,1,0,0.29173119068145753
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,16,2,0,0.16810400485992433
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,16,4,0,0.10771520137786865
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,16,8,0,0.07531200051307678
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,16,16,0,0.05554559826850891
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,16,32,0,0.049327999353408813
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,256,16,64,0,0.04870080053806305
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,32,1,0,0.5321856021881104
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,32,2,0,0.2861520051956177
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,32,4,0,0.16946879625320435
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,32,8,0,0.10597440004348754
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,32,16,0,0.07210080027580261
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,32,32,0,0.05358880162239075
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,256,32,64,0,0.04848639965057373
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,32,1,0,0.5553808212280273
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,32,2,0,0.3099695920944214
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,32,4,0,0.19036959409713744
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,32,8,0,0.12900160551071166
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,32,16,0,0.0959007978439331
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,32,32,0,0.0773248016834259
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,256,32,64,0,0.07228800058364868
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,64,1,0,1.080350399017334
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,64,2,0,0.5636767864227294
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,64,4,0,0.3186079978942871
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,64,8,0,0.1952239990234375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,64,16,0,0.13536159992218016
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,64,32,0,0.09993759989738464
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,64,32,0,0.14004960060119628
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,256,64,64,0,0.080103999376297
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,256,64,64,0,0.11952799558639526
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,64,1,0,1.104580783843994
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,64,2,0,0.6127295970916748
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,64,4,0,0.3613248109817505
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,64,8,0,0.23599040508270264
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,64,16,0,0.17319200038909913
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,128,1,0,2.25130558013916
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,128,2,0,1.1561424255371093
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,128,4,0,0.6341887950897217
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,128,8,0,0.37965760231018064
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,128,16,0,0.2521888017654419
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,128,32,0,0.18758560419082643
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,256,128,64,0,0.14592479467391967
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,128,1,0,2.3143264770507814
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,128,2,0,1.231886386871338
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,128,4,0,0.7122608184814453
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,128,8,0,0.4546624183654785
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,128,16,0,0.3265872001647949
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,128,32,0,0.2642384052276611
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,256,128,64,0,0.22156000137329102
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,256,2,0,2.7733695983886717
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,256,1,0,5.231631851196289
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,256,4,0,1.4413375854492188
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,256,8,0,0.8349488258361817
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,256,16,0,0.5257311820983886
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,256,32,0,0.37627038955688474
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,1,256,256,64,0,0.28373119831085203
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,256,2,0,2.9079599380493164
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,256,1,0,5.786363220214843
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,1,1,0,0.01308320015668869
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,256,4,0,1.5739567756652832
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,256,8,0,0.9769472122192383
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,256,16,0,0.674675178527832
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,256,32,0,0.5223152160644531
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,256,256,64,0,0.4295792102813721
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,1,2,0,0.012611199915409089
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,1,4,0,0.012387199699878693
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,1,8,0,0.012033600360155106
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,1,16,0,0.012214399874210358
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,1,32,0,0.011975999921560287
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,1,64,0,0.012076800316572189
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,1,1,0,0.018751999735832213
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,1,2,0,0.018361599743366243
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,1,4,0,0.018036800622940063
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,1,8,0,0.017470400035381316
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,1,16,0,0.017684799432754517
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,1,32,0,0.01781280040740967
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,1,64,0,0.017609600722789765
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,16,1,0,0.015065599977970124
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,16,2,0,0.014047999680042268
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,16,4,0,0.013860799372196198
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,16,8,0,0.013833600282669067
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,16,16,0,0.013540799915790557
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,16,32,0,0.013356800377368926
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,16,64,0,0.012937599420547485
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,16,1,0,0.021355199813842773
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,16,2,0,0.01950239986181259
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,16,4,0,0.019675199687480927
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,16,8,0,0.019545599818229675
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,16,16,0,0.019499200582504272
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,16,32,0,0.01897599995136261
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,16,64,0,0.01871040016412735
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,32,1,0,0.015080000460147857
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,32,2,0,0.015145599842071533
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,32,4,0,0.01480640023946762
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,32,8,0,0.01393280029296875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,32,16,0,0.013748799264431
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,32,32,0,0.013607999682426453
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,32,64,0,0.013444800674915314
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,32,1,0,0.021510399878025055
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,32,2,0,0.02088800072669983
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,32,4,0,0.02009759992361069
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,64,2,0,0.015363200008869171
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,32,8,0,0.019710400700569154
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,32,16,0,0.019500799477100372
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,32,32,0,0.01961120069026947
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,32,64,0,0.019105599820613862
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,64,1,0,0.015782399475574492
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,64,4,0,0.014884799718856812
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,64,8,0,0.014312000572681427
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,64,16,0,0.013942399621009826
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,64,32,0,0.013910399377346038
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,64,64,0,0.013633599877357483
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,64,1,0,0.022460800409317017
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,64,2,0,0.021881599724292756
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,64,4,0,0.021219199895858763
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,64,8,0,0.019782400131225585
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,64,16,0,0.019753600656986236
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,64,32,0,0.019732800126075745
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,64,64,0,0.01953279972076416
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,128,1,0,0.026414400339126586
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,128,2,0,0.017343999445438386
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,128,4,0,0.016846400499343873
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,128,8,0,0.016463999450206757
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,128,16,0,0.015585599839687348
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,128,32,0,0.015502400696277618
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,128,32,0,0.02162719964981079
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,128,64,0,0.01536639928817749
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,128,1,0,0.035734400153160095
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,128,2,0,0.023558400571346283
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,128,4,0,0.023078399896621703
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,128,8,0,0.022676800191402436
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,128,16,0,0.02202879935503006
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,128,64,0,0.021055999398231506
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,256,1,0,0.05437279939651489
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,256,64,0,0.01895360052585602
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,256,2,0,0.029731199145317078
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,256,4,0,0.020638400316238405
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,256,8,0,0.019860799610614776
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,256,16,0,0.01935359984636307
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,256,32,0,0.019462400674819948
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,256,2,0,0.03549120128154755
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,256,1,0,0.06390560269355774
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,256,4,0,0.027055999636650084
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,256,8,0,0.026723200082778932
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,256,16,0,0.02534559965133667
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,256,32,0,0.02504960000514984
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,256,64,0,0.025089600682258607
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,512,1,0,0.12058080434799194
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,512,2,0,0.06670560240745545
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,512,4,0,0.03561919927597046
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,512,8,0,0.02707839906215668
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,512,16,0,0.035703998804092404
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,512,32,0,0.028300800919532777
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,512,64,0,0.023824000358581544
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,512,1,0,0.13066719770431517
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,512,2,0,0.07676159739494323
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,512,4,0,0.04311679899692535
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,512,8,0,0.034272000193595886
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,512,16,0,0.04459680020809174
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,512,32,0,0.03542239964008331
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,512,64,0,0.030963200330734252
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,1024,32,0,0.04318560063838959
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,1024,1,0,0.3241935968399048
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,1024,2,0,0.17072160243988038
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,1024,4,0,0.09151840209960938
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,1024,8,0,0.04861760139465332
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,1024,16,0,0.04134399890899658
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,1024,64,0,0.03217119872570038
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,1024,1,0,0.340665602684021
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,1024,2,0,0.183404803276062
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,1024,4,0,0.10171359777450562
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,1024,8,0,0.058267199993133546
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,1024,16,0,0.047886401414871216
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,1024,32,0,0.05114719867706299
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,1024,64,0,0.039843198657035825
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,1536,1,0,0.6261807918548584
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,1536,2,0,0.33781440258026124
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,1536,4,0,0.17889759540557862
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,1536,8,0,0.09401599764823913
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,1536,16,0,0.054388797283172606
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,1536,32,0,0.05894240140914917
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,1536,64,0,0.0423007994890213
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,1536,1,0,0.6378943920135498
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,1536,2,0,0.33385279178619387
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,1536,4,0,0.18092960119247437
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,1536,8,0,0.10569920539855956
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,1536,16,0,0.06331200003623963
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,1536,32,0,0.07086880207061767
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,1536,64,0,0.05348960161209106
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,2048,1,0,1.0017775535583495
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,2048,2,0,0.528110408782959
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,2048,4,0,0.2701215982437134
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,2048,8,0,0.14642879962921143
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,2048,16,0,0.07864639759063721
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,2048,32,0,0.068259197473526
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,2048,64,0,0.05816799998283386
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,2048,32,0,0.07796800136566162
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,2048,1,0,1.0118831634521483
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,2048,2,0,0.5331727981567382
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,2048,4,0,0.2826031923294067
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,2048,8,0,0.15420639514923096
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,2048,16,0,0.08882880210876465
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,2048,64,0,0.06932960152626037
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,3072,1,0,2.1212335586547852
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,3072,2,0,1.204982376098633
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,3072,4,0,0.547657585144043
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,3072,8,0,0.2786207914352417
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,3072,16,0,0.16064640283584594
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,3072,32,0,0.09793279767036438
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,3072,64,0,0.082259202003479
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,3072,1,0,2.0667776107788085
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,3072,2,0,1.0664079666137696
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,4096,1,0,4.125476837158203
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,3072,4,0,0.5928671836853028
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,3072,8,0,0.2905424118041992
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,3072,16,0,0.17296799421310424
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,3072,32,0,0.10924160480499268
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,3072,64,0,0.09609119892120362
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,4096,2,0,1.8089584350585937
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,4096,4,0,0.9161215782165527
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,4096,8,0,0.5053647994995117
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,4096,16,0,0.2692032098770142
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,4096,32,0,0.13332480192184448
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,4096,64,0,0.12370560169219971
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,4096,1,0,3.7554222106933595
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,4096,2,0,1.7999664306640626
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,4096,4,0,0.9133312225341796
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,6144,2,0,4.189699172973633
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,4096,8,0,0.47384161949157716
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,4096,16,0,0.2659791946411133
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,4096,32,0,0.1519871950149536
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,4096,64,0,0.13725119829177856
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,6144,1,0,8.614288330078125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,6144,4,0,2.0380640029907227
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,6144,8,0,0.9830880165100098
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,6144,16,0,0.5107999801635742
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,6144,32,0,0.2866208076477051
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,6144,64,0,0.19036799669265747
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,6144,1,0,9.615299224853516
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,6144,2,0,3.967697525024414
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,6144,4,0,1.9569440841674806
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,6144,8,0,0.9900032043457031
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,6144,16,0,0.5170063972473145
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,6144,32,0,0.30860960483551025
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,6144,64,0,0.19979679584503174
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,8192,2,0,8.852875518798829
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,8192,1,0,17.772915649414063
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,8192,4,0,3.685097503662109
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,8192,8,0,1.734979248046875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,8192,16,0,0.8735024452209472
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,8192,32,0,0.44907522201538086
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,8192,64,0,0.24519360065460205
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,8192,1,0,14.657884216308593
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,8192,2,0,8.361978912353516
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,8192,4,0,3.976873779296875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,8192,8,0,1.6806911468505858
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,10240,8,0,3.011435127258301
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,8192,16,0,0.8741791725158692
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,8192,32,0,0.4691808223724365
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,8192,64,0,0.26410560607910155
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,10240,2,0,11.751537322998047
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,10240,1,0,23.673486328125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,10240,4,0,6.852073669433594
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,10240,16,0,1.3434576034545898
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,10240,32,0,0.7577792167663574
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,10240,64,0,0.39236159324645997
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,10240,1,0,22.40167236328125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,10240,64,0,0.42954401969909667
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,10240,2,0,11.030267333984375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,10240,4,0,5.516505432128906
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,10240,8,0,2.6867040634155273
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,10240,16,0,1.496121597290039
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,10240,32,0,0.7130271911621093
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,12288,2,0,16.953208923339844
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,12288,4,0,9.79760513305664
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,12288,8,0,3.7644607543945314
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,12288,1,0,33.51523132324219
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,12288,16,0,1.9123264312744142
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,12288,32,0,0.9754159927368165
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,12288,64,0,0.6000463962554932
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,12288,2,0,15.996487426757813
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,12288,1,0,32.078515625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,12288,8,0,3.8827136993408202
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,12288,4,0,9.160179138183594
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,12288,16,0,1.8903072357177735
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,16384,8,0,7.229606628417969
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,16384,16,0,3.999204635620117
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,12288,32,0,0.9734127998352051
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,12288,64,0,0.5772687911987304
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,16384,2,0,29.87212219238281
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,16384,4,0,14.96580810546875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,16384,32,0,1.7101791381835938
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,16384,64,0,0.8648719787597656
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,16384,1,0,71.40850219726562
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,16384,8,0,7.0620674133300785
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,16384,4,0,14.303695678710938
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,16384,16,0,3.449964904785156
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,16384,2,0,28.478146362304688
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,16384,32,0,1.6670896530151367
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,16384,64,0,0.8839776039123535
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,16384,1,0,66.48209228515626
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,32768,16,0,14.785116577148438
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,32768,32,0,7.2707069396972654
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,32768,64,0,3.458446502685547
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,32768,8,0,29.64429931640625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,32768,4,0,60.04627685546875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,32768,2,0,120.20665283203125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,32768,32,0,7.0488334655761715
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,32768,16,0,14.16622314453125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,32768,64,0,3.3609375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,32768,8,0,27.61882629394531
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,1,1,0,0.013313600420951843
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,32768,4,0,56.31193237304687
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,1,2,0,0.012827199697494508
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,1,4,0,0.01233920007944107
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,1,8,0,0.012033600360155106
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,1,16,0,0.011998400092124939
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,32768,2,0,114.31712646484375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,1,32,0,0.01194079965353012
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,1,64,0,0.012035199999809265
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,1,1,0,0.01889439970254898
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,1,2,0,0.018278400599956512
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,1,4,0,0.017502400279045104
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,1,8,0,0.017444799840450286
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,32768,1,0,280.72216796875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,1,16,0,0.017390400171279907
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,1,32,0,0.017166399955749513
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,1,64,0,0.01743520051240921
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,16,1,0,0.01496800035238266
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,32768,1,0,265.9548095703125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,16,2,0,0.015199999511241912
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,16,4,0,0.014035199582576752
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,16,8,0,0.013910399377346038
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,16,16,0,0.013840000331401824
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,16,32,0,0.013279999792575835
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,16,64,0,0.012873600423336028
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,16,1,0,0.02168480008840561
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,16,2,0,0.020904000103473663
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,16,4,0,0.02017440050840378
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,16,8,0,0.019735999405384064
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,16,16,0,0.01958879977464676
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,16,32,0,0.019115200638771056
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,16,64,0,0.01873439997434616
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,32,1,0,0.015915200114250183
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,32,2,0,0.01507679969072342
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,32,4,0,0.014956800639629364
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,32,8,0,0.014291200041770934
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,32,16,0,0.014059199392795563
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,32,32,0,0.013775999844074249
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,32,64,0,0.013340799510478974
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,32,1,0,0.02249760031700134
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,32,2,0,0.021745599806308746
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,32,4,0,0.020955200493335723
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,32,8,0,0.019750399887561797
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,32,16,0,0.019815999269485473
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,32,32,0,0.019841599464416503
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,32,64,0,0.018985599279403687
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,64,1,0,0.023865599930286408
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,64,2,0,0.016150400042533875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,64,4,0,0.015535999834537507
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,64,8,0,0.014851200580596923
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,64,16,0,0.014139199256896972
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,64,32,0,0.01383039951324463
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,64,64,0,0.013996799290180207
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,64,1,0,0.031172800064086913
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,64,2,0,0.02242079973220825
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,64,4,0,0.021272000670433045
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,64,8,0,0.02144639939069748
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,64,16,0,0.020084799826145174
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,64,32,0,0.019843199849128725
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,64,64,0,0.019704000651836397
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,128,1,0,0.04773440062999725
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,128,2,0,0.024931199848651886
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,128,4,0,0.0181551992893219
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,128,8,0,0.017214399576187134
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,128,16,0,0.016940799355506898
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,128,32,0,0.015910400450229643
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,128,64,0,0.01579360067844391
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,128,64,0,0.02155359983444214
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,128,1,0,0.05757279992103577
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,128,2,0,0.031999999284744264
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,128,4,0,0.023868800699710847
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,128,8,0,0.023131200671195985
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,128,16,0,0.023078399896621703
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,128,32,0,0.022358399629592896
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,256,1,0,0.09620800018310546
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,256,2,0,0.05379679799079895
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,256,4,0,0.02922239899635315
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,256,8,0,0.021022400259971617
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,256,16,0,0.0204927995800972
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,256,32,0,0.020411199331283568
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,256,64,0,0.019934399425983428
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,256,1,0,0.10578240156173706
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,256,2,0,0.06404160261154175
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,256,4,0,0.03697440028190613
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,256,8,0,0.028143998980522156
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,256,16,0,0.02741439938545227
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,256,32,0,0.027107200026512145
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,256,64,0,0.025987198948860167
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,512,1,0,0.22334721088409423
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,512,2,0,0.12092640399932861
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,512,4,0,0.06681439876556397
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,512,8,0,0.03681440055370331
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,512,16,0,0.033932799100875856
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,512,8,0,0.046035200357437134
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,512,16,0,0.04076800048351288
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,512,32,0,0.03706879913806915
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,512,64,0,0.029705598950386047
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,512,1,0,0.2344048023223877
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,512,2,0,0.13021279573440553
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,512,4,0,0.07661920189857482
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,512,32,0,0.04561919867992401
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,512,64,0,0.03710240125656128
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,1024,1,0,0.6290063858032227
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,1024,2,0,0.32497920989990237
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,1024,1,0,0.6536320209503174
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,1024,4,0,0.17996640205383302
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,1024,8,0,0.0926367998123169
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,1024,16,0,0.05262399911880493
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,1024,32,0,0.04887360036373138
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,1024,64,0,0.04613119959831238
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,1024,2,0,0.33603360652923586
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,1024,4,0,0.18197120428085328
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,1024,8,0,0.10582079887390136
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,1024,16,0,0.06375679969787598
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,1024,32,0,0.05861120223999024
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,1024,64,0,0.05678079724311828
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,1536,1,0,1.2319935798645019
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,1536,2,0,0.6345376014709473
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,1536,4,0,0.32279040813446047
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,1536,16,0,0.09990400075912476
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,1536,8,0,0.17044639587402344
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,1536,32,0,0.06639040112495423
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,1536,16,0,0.11257120370864868
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,1536,32,0,0.07839360237121581
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,1536,64,0,0.07187039852142334
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,1536,1,0,1.2302864074707032
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,1536,2,0,0.6429903984069825
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,1536,4,0,0.3378175973892212
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,1536,8,0,0.1833184003829956
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,2048,16,0,0.14856480360031127
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,1536,64,0,0.08617759943008423
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,2048,1,0,2.287473678588867
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,2048,2,0,1.0271552085876465
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,2048,4,0,0.5239327907562256
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,2048,8,0,0.27291359901428225
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,2048,32,0,0.08350080251693726
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,2048,64,0,0.07929120063781739
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,2048,64,0,0.09411839842796325
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,2048,1,0,2.003495979309082
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,2048,2,0,1.1150912284851073
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,2048,4,0,0.55491042137146
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,2048,8,0,0.2857487916946411
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,2048,16,0,0.16287039518356322
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,2048,32,0,0.09872959852218628
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,3072,1,0,4.359110260009766
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,3072,2,0,2.2008432388305663
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,3072,4,0,1.0795503616333009
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,3072,8,0,0.6154704093933105
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,3072,16,0,0.3059135913848877
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,3072,32,0,0.16671359539031982
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,3072,64,0,0.1141327977180481
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,3072,1,0,4.3262176513671875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,3072,2,0,2.109027290344238
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,3072,4,0,1.0867183685302735
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,3072,8,0,0.5641392230987549
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,3072,16,0,0.3220848083496094
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,3072,64,0,0.1338495969772339
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,3072,32,0,0.19143359661102294
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,4096,1,0,7.93749771118164
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,4096,2,0,3.819839859008789
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,4096,4,0,1.8362720489501954
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,4096,8,0,0.9280287742614746
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,4096,16,0,0.4805391788482666
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,4096,32,0,0.281443190574646
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,4096,64,0,0.14729599952697753
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,4096,1,0,8.702297973632813
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,4096,4,0,1.82740478515625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,4096,2,0,3.7121200561523438
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,4096,8,0,0.9284607887268066
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,4096,16,0,0.4999120235443115
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,4096,32,0,0.2805808067321777
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,4096,64,0,0.17468160390853882
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,6144,2,0,10.073108673095703
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,6144,4,0,4.26135368347168
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,6144,1,0,17.40718994140625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,6144,8,0,1.966379165649414
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,6144,16,0,1.0018207550048828
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,6144,32,0,0.5294623851776123
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,6144,64,0,0.30913119316101073
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,6144,2,0,9.636182403564453
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,6144,1,0,19.401336669921875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,6144,8,0,1.9829088211059571
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,6144,4,0,4.030920028686523
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,6144,16,0,1.0169872283935546
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,6144,32,0,0.547599983215332
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,6144,64,0,0.3391279935836792
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,8192,2,0,17.865841674804688
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,8192,8,0,3.674982452392578
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,8192,4,0,8.9749267578125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,8192,1,0,30.458447265625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,8192,16,0,1.7545152664184571
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,8192,2,0,16.94146270751953
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,8192,32,0,0.8963616371154786
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,8192,64,0,0.47696962356567385
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,8192,4,0,7.313910675048828
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,8192,8,0,4.020558547973633
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,8192,1,0,29.14864501953125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,8192,16,0,1.7470079421997071
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,8192,32,0,0.9153583526611329
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,8192,64,0,0.5385039806365967
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,10240,1,0,47.557394409179686
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,10240,4,0,11.775881958007812
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,10240,8,0,6.817654418945312
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,10240,2,0,23.167547607421874
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,10240,16,0,2.813870429992676
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,10240,32,0,1.376923179626465
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,10240,2,0,22.213526916503906
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,10240,64,0,0.7404096126556396
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,10240,8,0,6.367367935180664
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,10240,4,0,11.343798065185547
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,10240,16,0,2.708692741394043
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,10240,32,0,1.3855199813842773
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,10240,64,0,0.7743696212768555
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,10240,1,0,53.1739013671875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,12288,4,0,19.935365295410158
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,12288,16,0,3.942776107788086
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,12288,8,0,8.468637084960937
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,12288,2,0,33.96201782226562
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,12288,32,0,1.9575935363769532
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,12288,64,0,1.0186688423156738
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,12288,16,0,3.9286945343017576
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,12288,1,0,67.7067626953125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,12288,8,0,8.014134216308594
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,12288,4,0,15.93297119140625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,12288,32,0,1.981585693359375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,12288,2,0,38.890438842773435
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,12288,64,0,1.050971221923828
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,12288,1,0,76.17623291015624
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,16384,16,0,7.484337615966797
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,16384,8,0,14.664425659179688
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,16384,32,0,4.1819313049316404
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,16384,4,0,29.885562133789062
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,16384,64,0,1.7915359497070313
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,16384,2,0,59.8896484375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,16384,1,0,121.74871826171875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,16384,4,0,28.002902221679687
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,16384,16,0,7.122727966308593
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,16384,8,0,16.872691345214843
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,16384,32,0,3.480428695678711
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,16384,2,0,57.93009033203125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,16384,64,0,1.7580671310424805
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,16384,1,0,134.57108154296876
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,32768,64,0,7.286558532714844
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,32768,32,0,14.93969268798828
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,32768,16,0,29.943118286132812
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,32768,8,0,71.25531616210938
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,32768,8,0,57.543829345703124
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,32768,4,0,121.1419921875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,32768,16,0,33.51268310546875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,32768,64,0,7.191358184814453
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,1,1,0,0.013372799754142762
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,32768,32,0,14.343798828125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,1,2,0,0.013262400031089782
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,1,4,0,0.012508800625801087
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,32768,4,0,114.69129638671875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,1,8,0,0.012329600006341934
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,1,16,0,0.01241919994354248
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,1,32,0,0.012148799747228623
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,1,64,0,0.012244799733161926
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,1,1,0,0.01918720006942749
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,1,2,0,0.0186256006360054
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,1,4,0,0.018012799322605133
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,1,8,0,0.017903999984264375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,1,16,0,0.017504000663757326
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,1,32,0,0.017566399276256563
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,32768,2,0,233.8378173828125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,1,64,0,0.017510400712490083
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,16,1,0,0.01590079963207245
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,16,2,0,0.015204800665378571
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,16,4,0,0.015238399803638458
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,16,8,0,0.013918399810791016
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,16,16,0,0.013678400218486786
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,16,32,0,0.013512000441551208
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,32768,2,0,225.4458740234375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,16,64,0,0.012887999415397644
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,16,1,0,0.02232639938592911
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,16,2,0,0.02125120013952255
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,16,4,0,0.020921599864959717
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,16,8,0,0.019787199795246124
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,16,16,0,0.01964160054922104
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,16,32,0,0.019215999543666838
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,16,64,0,0.018756799399852753
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,32,1,0,0.02375040054321289
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,32,2,0,0.01589599996805191
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,32,4,0,0.014868800342082978
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,32,8,0,0.014934399724006652
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,32,16,0,0.01422560065984726
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,32,32,0,0.01390720009803772
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,32,64,0,0.01363999992609024
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,32,1,0,0.03315039873123169
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,32,32,0,0.019700799882411957
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,32,2,0,0.022299200296401978
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,32,4,0,0.02147520035505295
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,32,8,0,0.021380800008773803
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,32,16,0,0.020924800634384157
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,64,1,0,0.044566398859024046
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,32,64,0,0.01942719966173172
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,64,2,0,0.02463040053844452
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,64,4,0,0.01605760008096695
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,64,8,0,0.015694400668144225
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,64,16,0,0.01579200029373169
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,64,32,0,0.014372800290584565
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,64,64,0,0.01408960074186325
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,64,1,0,0.0548687994480133
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,64,2,0,0.033345600962638854
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,64,4,0,0.02301120012998581
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,64,8,0,0.022382399439811705
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,64,16,0,0.021792000532150267
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,64,32,0,0.021380800008773803
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,64,64,0,0.020311999320983886
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,128,1,0,0.08373919725418091
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,32768,1,0,480.999169921875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,128,2,0,0.04747839868068695
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,128,4,0,0.02860639989376068
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,128,8,0,0.01828960031270981
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,128,16,0,0.017689600586891174
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,128,32,0,0.017497600615024568
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,128,64,0,0.016972799599170686
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,128,1,0,0.093505597114563
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,128,2,0,0.05817279815673828
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,128,4,0,0.036852800846099855
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,128,8,0,0.024742400646209715
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,128,16,0,0.023928000032901763
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,128,32,0,0.024081599712371827
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,128,64,0,0.023310400545597076
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,256,1,0,0.17564959526062013
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,256,2,0,0.09581279754638672
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,256,4,0,0.05494239926338196
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,256,8,0,0.032974401116371156
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,32768,1,0,533.135595703125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,256,16,0,0.022470399737358093
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,256,32,0,0.022316800057888032
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,256,64,0,0.021998399496078493
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,256,1,0,0.1846943974494934
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,256,2,0,0.10537760257720948
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,256,4,0,0.06527519822120667
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,256,8,0,0.04258559942245484
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,256,16,0,0.029785600304603577
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,512,16,0,0.053467202186584475
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,256,32,0,0.02916640043258667
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,256,64,0,0.029172798991203307
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,512,1,0,0.4270944118499756
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,512,1,0,0.44909281730651857
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,512,2,0,0.22640159130096435
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,512,4,0,0.12138559818267822
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,512,8,0,0.06924639940261841
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,512,32,0,0.0364544004201889
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,512,64,0,0.039611199498176576
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,512,2,0,0.23884000778198242
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,512,4,0,0.1333616018295288
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,512,8,0,0.08158879876136779
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,512,16,0,0.0664896011352539
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,512,32,0,0.04660640060901642
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,512,64,0,0.050695997476577756
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,1024,1,0,1.3177167892456054
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,1024,2,0,0.6340767860412597
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,1024,4,0,0.33130559921264646
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,1024,8,0,0.1748271942138672
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,1024,16,0,0.10177279710769653
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,1024,32,0,0.0778976023197174
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,1024,64,0,0.05480480194091797
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,1024,1,0,1.2470687866210937
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,1024,2,0,0.6628511905670166
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,1024,4,0,0.3439743995666504
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,1024,8,0,0.19145280122756958
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,1024,16,0,0.1146016001701355
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,1536,8,0,0.32823359966278076
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,1024,32,0,0.09475839734077454
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,1024,64,0,0.06905919909477234
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,1536,1,0,2.4689472198486326
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,1536,2,0,1.2510448455810548
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,1536,4,0,0.6751232147216797
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,1536,16,0,0.1802288055419922
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,1536,32,0,0.12157119512557983
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,1536,64,0,0.0871999979019165
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,1536,1,0,2.8010208129882814
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,1536,2,0,1.2587871551513672
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,2048,1,0,4.227601623535156
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,1536,4,0,0.6511472225189209
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,1536,8,0,0.3468991994857788
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,1536,16,0,0.20227999687194825
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,1536,32,0,0.14126559495925903
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,1536,64,0,0.10825120210647583
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,2048,2,0,2.3644575119018554
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,2048,4,0,1.040447998046875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,2048,8,0,0.5324927806854248
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,2048,16,0,0.2849152088165283
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,2048,32,0,0.16080479621887206
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,2048,64,0,0.1306175947189331
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,2048,1,0,4.148559951782227
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,2048,2,0,2.058681678771973
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,2048,4,0,1.13438720703125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,2048,8,0,0.552620792388916
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,3072,2,0,4.44531364440918
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,2048,16,0,0.3063951969146729
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,2048,32,0,0.1838304042816162
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,2048,64,0,0.15203839540481567
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,3072,4,0,2.202859115600586
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,3072,1,0,10.748136138916015
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,3072,16,0,0.5686560153961182
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,3072,8,0,1.2318047523498534
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,3072,32,0,0.3148303985595703
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,3072,64,0,0.20836479663848878
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,3072,2,0,4.440406417846679
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,3072,1,0,8.90127182006836
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,3072,4,0,2.1648832321166993
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,3072,8,0,1.1712783813476562
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,3072,16,0,0.5922592163085938
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,3072,32,0,0.35751359462738036
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,3072,64,0,0.23904640674591066
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,4096,4,0,3.87216796875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,4096,2,0,7.962057495117188
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,4096,1,0,15.792036437988282
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,4096,8,0,1.882316780090332
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,4096,8,0,1.830241584777832
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,4096,32,0,0.5092832088470459
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,4096,16,0,1.0400208473205566
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,4096,64,0,0.30530080795288084
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,4096,1,0,15.339152526855468
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,4096,2,0,7.644468688964844
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,4096,4,0,3.670988845825195
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,4096,16,0,0.9709008216857911
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,4096,32,0,0.5414000034332276
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,4096,64,0,0.3232367992401123
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,6144,4,0,8.548617553710937
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,6144,2,0,20.79114685058594
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,6144,8,0,4.14300651550293
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,6144,16,0,1.9965551376342774
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,6144,32,0,1.0469391822814942
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,6144,64,0,0.5857600212097168
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,6144,1,0,41.844241333007815
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,6144,4,0,9.735919952392578
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,6144,2,0,16.821395874023438
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,6144,8,0,4.149363327026367
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,6144,16,0,2.0128768920898437
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,6144,32,0,1.076416015625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,6144,1,0,33.47462463378906
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,6144,64,0,0.6304944038391114
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,8192,4,0,15.351826477050782
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,8192,8,0,7.558916473388672
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,8192,16,0,3.6469329833984374
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,8192,32,0,1.818979263305664
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,8192,64,0,0.9634112358093262
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,8192,2,0,36.90980834960938
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,8192,8,0,7.223265838623047
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,8192,1,0,72.1724365234375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,8192,16,0,3.6005455017089845
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,8192,4,0,14.730940246582032
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,8192,2,0,29.168829345703124
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,8192,32,0,1.842081642150879
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,8192,64,0,1.1171504020690919
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,8192,1,0,59.451995849609375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,10240,4,0,23.421060180664064
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,10240,16,0,5.823739242553711
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,10240,8,0,11.917027282714844
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,10240,64,0,1.6424240112304687
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,10240,32,0,2.786800003051758
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,10240,2,0,55.96749267578125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,10240,8,0,11.44589614868164
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,10240,4,0,22.717718505859374
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,10240,16,0,6.460887908935547
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,10240,32,0,2.859231948852539
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,10240,1,0,93.78356323242187
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,10240,2,0,45.68960876464844
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,10240,64,0,1.6519615173339843
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,10240,1,0,92.11719360351563
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,12288,16,0,8.253660583496094
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,12288,32,0,4.0457313537597654
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,12288,8,0,17.13758239746094
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,12288,64,0,2.317134475708008
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,12288,4,0,41.19889526367187
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,12288,16,0,9.482244873046875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,12288,2,0,68.79277954101562
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,12288,32,0,4.745830535888672
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,12288,8,0,16.38670654296875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,12288,64,0,2.3125247955322266
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,12288,4,0,31.916781616210937
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,12288,2,0,64.41631469726562
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,12288,1,0,137.7789306640625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,16384,32,0,7.233243560791015
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,16384,16,0,14.918045043945312
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,16384,64,0,3.7272464752197267
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,16384,8,0,34.91331176757812
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,16384,4,0,61.12838134765625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,12288,1,0,132.02601318359376
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,16384,32,0,7.313201904296875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,16384,16,0,14.475033569335938
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,16384,8,0,33.367669677734376
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,16384,64,0,3.6883296966552734
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,1,1,0,0.013515199720859527
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,1,2,0,0.013184000551700593
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,16384,4,0,58.15843505859375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,1,4,0,0.012646399438381195
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,1,8,0,0.01223199963569641
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,1,16,0,0.01228640004992485
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,16384,2,0,143.68814697265626
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,1,32,0,0.012353599816560746
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,1,64,0,0.012303999811410903
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,1,1,0,0.019465599954128266
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,1,16,0,0.017980800569057466
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,1,2,0,0.01881439983844757
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,16384,2,0,114.4468505859375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,1,4,0,0.018294399976730345
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,1,8,0,0.018323199450969697
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,1,32,0,0.018432000279426576
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,1,64,0,0.017977599799633027
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,16,1,0,0.02401120066642761
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,16,2,0,0.016113600134849547
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,16,4,0,0.015195199847221374
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,16,8,0,0.015203200280666351
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,16,16,0,0.0143312007188797
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,16384,1,0,243.062353515625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,16,32,0,0.013704000413417817
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,16,64,0,0.0131632000207901
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,16,1,0,0.03301439881324768
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,16,2,0,0.022308799624443054
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,32,1,0,0.044670400023460385
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,16,4,0,0.02194399982690811
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,16,8,0,0.02110079973936081
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,16384,1,0,226.9685791015625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,16,16,0,0.02017119973897934
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,16,32,0,0.019593599438667297
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,16,64,0,0.018955199420452117
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,32,2,0,0.024139200150966645
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,32,4,0,0.01619359999895096
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,32,8,0,0.015707199275493623
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,32,16,0,0.015382400155067444
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,32,32,0,0.014375999569892883
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,32,1,0,0.054771202802658084
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,32,64,0,0.019862399995326997
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,32,64,0,0.014379200339317322
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,32,2,0,0.032579201459884646
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,32,4,0,0.022694399952888487
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,32,8,0,0.021792000532150267
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,32,16,0,0.02159679979085922
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,32,32,0,0.020443199574947356
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,64,1,0,0.07889919877052307
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,64,2,0,0.04548160135746002
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,64,4,0,0.025038400292396547
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,64,8,0,0.01722400039434433
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,64,16,0,0.016313600540161132
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,64,32,0,0.016073599457740784
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,64,64,0,0.015494400262832641
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,64,1,0,0.08932160139083863
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,64,2,0,0.055667197704315184
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,64,4,0,0.03247840106487274
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,64,8,0,0.023712000250816344
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,64,16,0,0.02327519953250885
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,64,32,0,0.022694399952888487
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,64,64,0,0.021663999557495116
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,128,1,0,0.15184160470962524
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,128,2,0,0.08426880240440368
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,128,2,0,0.09398720264434815
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,128,4,0,0.04916639924049378
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,128,8,0,0.02866399884223938
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,128,16,0,0.019940799474716185
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,128,32,0,0.01928640007972717
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,128,64,0,0.01948480010032654
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,128,1,0,0.1628383994102478
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,128,4,0,0.05991680026054382
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,128,8,0,0.037808001041412354
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,128,16,0,0.02654399871826172
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,128,32,0,0.026123198866844177
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,128,64,0,0.026364800333976746
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,256,1,0,0.3316879987716675
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,256,2,0,0.17804960012435914
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,256,4,0,0.0978767991065979
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,256,8,0,0.058601599931716916
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,256,16,0,0.036817601323127745
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,256,32,0,0.02561439871788025
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,256,64,0,0.02550080120563507
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,256,1,0,0.3426736116409302
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,256,2,0,0.18956960439682008
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,256,4,0,0.11040960550308228
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,256,8,0,0.07031679749488831
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,256,16,0,0.05025280117988586
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,256,32,0,0.03550559878349304
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,256,64,0,0.03496319949626923
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,512,1,0,0.8324720382690429
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,512,2,0,0.4386879920959473
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,512,4,0,0.23458399772644042
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,512,8,0,0.1289471983909607
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,512,16,0,0.09328960180282593
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,512,32,0,0.06369439959526062
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,512,64,0,0.04271360039710999
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,512,1,0,0.8474592208862305
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,512,2,0,0.45135841369628904
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,512,4,0,0.24948480129241943
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,512,8,0,0.14391839504241943
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,512,16,0,0.10960960388183594
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,512,32,0,0.07934880256652832
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,512,64,0,0.05721759796142578
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,1024,1,0,2.4854639053344725
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,1024,2,0,1.2552047729492188
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,1024,4,0,0.6456639766693115
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,1024,8,0,0.33949439525604247
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,1024,16,0,0.19256800413131714
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,1024,32,0,0.14001599550247193
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,1024,64,0,0.09529280066490173
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,1024,1,0,2.504684829711914
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,1024,4,0,0.669152021408081
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,1024,2,0,1.2675567626953126
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,1024,8,0,0.35940799713134763
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,1024,16,0,0.21114881038665773
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,1024,32,0,0.16081440448760986
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,1024,64,0,0.12033120393753052
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,1536,1,0,5.096635055541992
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,1536,2,0,2.5154703140258787
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,1536,4,0,1.2630703926086426
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,1536,8,0,0.6449007987976074
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,1536,16,0,0.3452608108520508
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,1536,32,0,0.22752799987792968
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,1536,64,0,0.15667200088500977
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,1536,1,0,5.884348678588867
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,1536,2,0,2.5152368545532227
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,1536,4,0,1.291102409362793
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,1536,8,0,0.6721983909606933
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,1536,16,0,0.37634079456329345
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,1536,32,0,0.25691039562225343
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,1536,64,0,0.1869488000869751
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,2048,1,0,8.851817321777343
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,2048,2,0,4.841457748413086
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,2048,4,0,2.0944192886352537
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,2048,8,0,1.0579232215881347
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,2048,2,0,4.25249137878418
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,2048,16,0,0.5588287830352783
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,2048,32,0,0.31084160804748534
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,2048,64,0,0.22278079986572266
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,2048,1,0,9.748612976074218
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,2048,4,0,2.3562240600585938
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,2048,16,0,0.5973152160644531
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,2048,8,0,1.0797247886657715
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,2048,32,0,0.3526511907577515
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,2048,64,0,0.26117119789123533
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,3072,4,0,5.249723052978515
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,3072,2,0,9.2130126953125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,3072,16,0,1.1287471771240234
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,3072,8,0,2.556879997253418
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,3072,1,0,18.727088928222656
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,3072,32,0,0.6053599834442138
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,3072,64,0,0.3898736000061035
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,3072,8,0,2.187153625488281
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,3072,4,0,4.3623615264892575
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,3072,16,0,1.2761679649353028
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,3072,2,0,10.343004608154297
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,3072,1,0,17.860565185546875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,3072,32,0,0.656931209564209
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,3072,64,0,0.44164161682128905
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,4096,16,0,2.142862319946289
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,4096,8,0,4.5778766632080075
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,4096,4,0,7.865058898925781
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,4096,32,0,1.0109999656677247
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,4096,64,0,0.5586351871490478
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,4096,2,0,16.288587951660155
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,4096,32,0,1.0532416343688964
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,4096,1,0,32.176980590820314
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,4096,8,0,3.7239009857177736
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,4096,4,0,9.013668823242188
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,4096,16,0,1.9393024444580078
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,4096,2,0,17.796311950683595
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,4096,64,0,0.6307328224182129
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,4096,1,0,30.394985961914063
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,6144,16,0,4.2427726745605465
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,6144,32,0,2.404128074645996
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,6144,8,0,10.290052795410157
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,6144,64,0,1.1474287986755372
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,6144,4,0,17.379135131835938
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,6144,2,0,35.79271545410156
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,6144,8,0,9.82303237915039
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,6144,32,0,2.1405391693115234
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,6144,4,0,19.708937072753905
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,6144,16,0,4.223880004882813
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,6144,64,0,1.228769588470459
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,6144,2,0,33.612939453125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,6144,1,0,71.148046875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,8192,16,0,7.737448120117188
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,8192,8,0,15.567768859863282
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,8192,32,0,3.77410888671875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,8192,64,0,1.9491472244262695
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,6144,1,0,66.74751586914063
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,8192,4,0,36.316348266601565
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,8192,32,0,4.264988708496094
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,8192,16,0,7.39764175415039
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,8192,8,0,14.898768615722656
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,8192,2,0,60.88546142578125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,8192,64,0,2.209382438659668
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,8192,4,0,29.534085083007813
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,1,1,0,0.014707200229167938
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,1,2,0,0.013275200128555298
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,1,4,0,0.012961600720882416
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,1,8,0,0.012390399724245072
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,8192,2,0,59.53638916015625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,1,16,0,0.012520000338554382
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,1,32,0,0.012561599910259246
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,1,64,0,0.012481600046157837
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,1,1,0,0.020347200334072113
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,8192,1,0,119.523193359375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,1,2,0,0.01900160014629364
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,1,4,0,0.018436799943447112
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,8192,1,0,146.85108642578126
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,1,8,0,0.018243199586868285
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,1,16,0,0.01785600036382675
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,1,32,0,0.01825920045375824
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,1,64,0,0.018006399273872375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,16,1,0,0.04625760018825531
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,16,2,0,0.024966399371623992
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,16,4,0,0.016139200329780577
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,16,8,0,0.015436799824237823
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,16,16,0,0.015433600544929505
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,16,32,0,0.014342400431632995
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,16,64,0,0.01371839940547943
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,16,1,0,0.05448480248451233
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,16,2,0,0.03251200020313263
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,16,4,0,0.022907200455665588
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,16,8,0,0.021855999529361726
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,16,16,0,0.02218399941921234
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,16,32,0,0.021110400557518005
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,16,64,0,0.020151999592781068
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,32,1,0,0.07832639813423156
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,32,2,0,0.04568960070610047
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,32,8,0,0.017084799706935883
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,32,4,0,0.024702399969100952
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,32,16,0,0.016331200301647187
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,32,32,0,0.016390399634838106
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,32,64,0,0.015219199657440185
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,32,1,0,0.08856639862060547
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,32,2,0,0.055587202310562134
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,64,1,0,0.1403439998626709
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,32,4,0,0.03407999873161316
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,32,8,0,0.023958399891853333
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,32,16,0,0.023119999468326567
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,32,32,0,0.02305919975042343
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,32,64,0,0.02125120013952255
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,64,2,0,0.07987359762191773
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,64,4,0,0.04670720100402832
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,64,4,0,0.056888002157211306
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,64,8,0,0.026124799251556398
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,64,16,0,0.02574400007724762
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,64,16,0,0.018592000007629395
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,64,32,0,0.018086400628089905
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,64,64,0,0.01807519942522049
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,64,1,0,0.1518944025039673
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,64,2,0,0.08985599875450134
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,64,8,0,0.03585120141506195
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,64,32,0,0.025176000595092774
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,64,64,0,0.02481119930744171
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,128,1,0,0.2864687919616699
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,128,2,0,0.15281120538711548
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,128,4,0,0.08605599999427796
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,128,2,0,0.16552480459213256
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,128,8,0,0.05275840163230896
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,128,16,0,0.032441601157188416
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,128,32,0,0.022935999929904936
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,128,64,0,0.022732800245285033
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,128,1,0,0.2967344045639038
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,128,4,0,0.0979423999786377
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,128,8,0,0.06425600051879883
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,128,16,0,0.045023998618125914
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,128,32,0,0.032436800003051755
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,128,64,0,0.03177599906921387
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,256,1,0,0.6388847827911377
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,256,2,0,0.33918399810791017
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,256,4,0,0.18352799415588378
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,256,8,0,0.10255520343780518
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,256,4,0,0.1989024043083191
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,256,16,0,0.06526079773902893
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,256,32,0,0.0446399986743927
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,256,64,0,0.03188000023365021
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,256,1,0,0.6614880084991455
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,512,1,0,1.6523183822631835
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,256,2,0,0.354748797416687
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,256,8,0,0.11801279783248901
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,256,16,0,0.08138399720191955
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,256,32,0,0.06141120195388794
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,256,64,0,0.04555520117282867
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,512,2,0,0.8658927917480469
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,512,4,0,0.4692495822906494
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,512,8,0,0.24161438941955565
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,512,16,0,0.1692080020904541
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,512,32,0,0.1079327940940857
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,512,64,0,0.07772319912910461
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,512,1,0,1.6664447784423828
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,512,2,0,0.8804368019104004
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,512,4,0,0.479472017288208
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,512,8,0,0.2672287940979004
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,512,16,0,0.1943295955657959
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,512,32,0,0.13194719552993775
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,512,64,0,0.1016495943069458
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,1024,1,0,5.879099273681641
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,1024,2,0,2.538924789428711
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,1024,1,0,5.026144027709961
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,1024,4,0,1.2803888320922852
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,1024,8,0,0.6684016227722168
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,1024,16,0,0.38506081104278567
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,1024,32,0,0.25029759407043456
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,1024,64,0,0.16517119407653807
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,1024,2,0,2.7501487731933594
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,1024,4,0,1.315056037902832
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,1024,8,0,0.7035840034484864
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,1024,16,0,0.4002336025238037
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,1024,32,0,0.2900799989700317
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,1024,64,0,0.20754239559173585
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,1536,1,0,10.599622344970703
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,1536,2,0,5.211779022216797
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,1536,4,0,2.7342575073242186
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,1536,8,0,1.280844783782959
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,1536,16,0,0.6805888175964355
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,1536,32,0,0.4305488109588623
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,1536,64,0,0.2786672115325928
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,1536,2,0,5.121467208862304
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,1536,64,0,0.3319135904312134
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,1536,1,0,11.956237030029296
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,1536,4,0,2.551513671875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,1536,8,0,1.387771224975586
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,1536,16,0,0.7348207950592041
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,1536,32,0,0.4870192050933838
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,2048,2,0,8.76715545654297
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,2048,4,0,4.326916885375977
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,2048,1,0,17.823390197753906
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,2048,16,0,1.2309503555297852
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,2048,8,0,2.1257568359375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,2048,32,0,0.6482607841491699
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,2048,64,0,0.4083888053894043
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,2048,4,0,4.208555221557617
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,2048,2,0,8.569627380371093
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,2048,8,0,2.1686416625976563
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,2048,1,0,16.828883361816406
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,2048,16,0,1.2619312286376954
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,2048,32,0,0.6755663871765136
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,2048,64,0,0.4911935806274414
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,3072,8,0,4.650057601928711
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,3072,16,0,2.225622367858887
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,3072,4,0,9.502581024169922
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,3072,2,0,18.640353393554687
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,3072,32,0,1.349068832397461
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,3072,64,0,0.739463996887207
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,3072,1,0,38.382373046875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,3072,4,0,9.016130828857422
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,3072,2,0,18.273164367675783
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,3072,8,0,4.563865661621094
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,3072,16,0,2.2982383728027345
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,3072,32,0,1.4098591804504395
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,3072,64,0,0.8403264045715332
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,3072,1,0,42.47782897949219
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,4096,8,0,8.190902709960938
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,4096,16,0,3.9632720947265625
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,4096,4,0,16.339134216308594
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,4096,64,0,1.1050848007202148
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,4096,32,0,2.326328086853027
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,4096,2,0,32.145562744140626
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,4096,1,0,65.38803100585938
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,4096,8,0,7.860334777832032
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,4096,4,0,15.473677062988282
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,4096,16,0,3.9107166290283204
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,4096,2,0,31.117282104492187
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,4096,64,0,1.2287903785705567
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,4096,32,0,2.378124809265137
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,1,1,0,0.01549919992685318
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,1,2,0,0.014510400593280792
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,1,4,0,0.013462400436401368
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,1,8,0,0.013196800649166108
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,1,16,0,0.012838399410247803
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,1,32,0,0.01292639970779419
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,4096,1,0,72.91544799804687
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,1,64,0,0.01316159963607788
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,1,1,0,0.02098720073699951
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,1,2,0,0.02056799978017807
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,1,4,0,0.01926400065422058
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,1,8,0,0.018812799453735353
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,1,16,0,0.018828800320625304
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,1,32,0,0.018592000007629395
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,1,64,0,0.01884479969739914
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,16,1,0,0.07917760014533996
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,16,2,0,0.04571360051631927
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,16,4,0,0.025947201251983642
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,16,8,0,0.017667199671268462
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,16,16,0,0.016891199350357055
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,16,32,0,0.016355200111865996
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,16,64,0,0.015385599434375763
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,16,1,0,0.08948000073432923
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,16,2,0,0.05584160089492798
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,16,4,0,0.03239839971065521
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,16,8,0,0.02433439940214157
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,16,16,0,0.023737600445747374
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,16,32,0,0.022976000607013703
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,16,64,0,0.02197439968585968
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,32,4,0,0.04729120135307312
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,32,1,0,0.1420815944671631
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,32,2,0,0.0807695984840393
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,32,8,0,0.0265392005443573
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,32,16,0,0.0188511997461319
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,32,32,0,0.01847680062055588
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,32,64,0,0.018139199912548067
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,32,1,0,0.15302560329437256
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,32,2,0,0.09012479782104492
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,32,4,0,0.05772799849510193
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,32,8,0,0.035713601112365725
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,32,16,0,0.025889599323272706
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,32,32,0,0.025412800908088683
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,32,64,0,0.024881599843502043
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,64,1,0,0.2709968090057373
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,64,2,0,0.14801280498504638
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,64,4,0,0.08363519906997681
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,64,8,0,0.050616002082824706
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,64,16,0,0.030008000135421754
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,64,32,0,0.022142399847507478
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,64,64,0,0.021724799275398256
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,64,1,0,0.27676639556884763
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,64,2,0,0.15555200576782227
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,64,4,0,0.09569119811058044
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,64,8,0,0.0625823974609375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,64,16,0,0.0430976003408432
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,64,32,0,0.03213439881801605
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,64,64,0,0.031220799684524535
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,128,1,0,0.5525712013244629
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,128,2,0,0.29161601066589354
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,128,4,0,0.15660159587860106
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,128,8,0,0.09261760115623474
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,128,16,0,0.05953279733657837
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,128,32,0,0.04075199961662292
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,128,64,0,0.02963840067386627
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,128,1,0,0.5651840209960938
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,128,2,0,0.30908639430999757
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,128,4,0,0.17157280445098877
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,128,8,0,0.10746079683303833
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,256,8,0,0.19104479551315307
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,128,16,0,0.07478560209274292
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,128,32,0,0.05591359734535217
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,128,64,0,0.04293439984321594
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,256,1,0,1.2581791877746582
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,256,2,0,0.6649775981903077
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,256,4,0,0.3580064058303833
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,256,16,0,0.11578559875488281
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,256,8,0,0.2145296096801758
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,256,32,0,0.07867040038108826
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,256,64,0,0.05901600122451782
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,256,1,0,1.3081888198852538
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,256,2,0,0.6884736061096192
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,256,4,0,0.3794127941131592
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,256,16,0,0.14053280353546144
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,256,32,0,0.10260000228881835
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,256,64,0,0.0825872004032135
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,512,1,0,3.284494400024414
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,512,1,0,3.3329696655273438
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,512,2,0,1.7154319763183594
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,512,4,0,0.9373760223388672
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,512,8,0,0.480134391784668
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,512,16,0,0.32283520698547363
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,512,32,0,0.20000159740447998
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,512,64,0,0.13580960035324097
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,512,4,0,0.9381600379943847
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,512,2,0,1.7384511947631835
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,512,8,0,0.5116464138031006
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,512,16,0,0.3667680025100708
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,512,32,0,0.23992319107055665
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,512,64,0,0.1760975956916809
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,1024,4,0,2.6084047317504884
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,1024,64,0,0.3114736080169678
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,1024,2,0,5.223737716674805
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,1024,1,0,10.562926483154296
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,1024,8,0,1.306436824798584
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,1024,16,0,0.7116896152496338
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,1024,32,0,0.499505615234375
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,1024,2,0,5.165702438354492
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,1024,4,0,2.823700714111328
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,1024,1,0,10.091795349121094
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,1024,8,0,1.3769392013549804
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,1024,16,0,0.7909535884857177
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,1024,32,0,0.5461232185363769
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,1024,64,0,0.3808896064758301
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,1536,4,0,6.119435119628906
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,1536,2,0,10.68282699584961
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,1536,8,0,2.521696090698242
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,1536,1,0,24.835443115234376
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,1536,16,0,1.3445839881896973
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,1536,32,0,0.843295955657959
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,1536,64,0,0.5161056041717529
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,1536,4,0,5.244862365722656
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,1536,8,0,2.6393903732299804
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,1536,16,0,1.5750816345214844
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,1536,2,0,11.862468719482422
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,1536,32,0,0.9438752174377442
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,1536,1,0,20.566915893554686
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,1536,64,0,0.6294095993041993
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,2048,4,0,8.953148651123048
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,2048,8,0,5.145145416259766
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,2048,2,0,17.957766723632812
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,2048,16,0,2.5064624786376952
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,2048,32,0,1.1978704452514648
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,2048,64,0,0.783569622039795
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,2048,1,0,35.47214965820312
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,2048,8,0,4.43243522644043
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,2048,1,0,34.7123779296875
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,1,2,0,0.01583999991416931
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,2048,4,0,10.082440185546876
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,2048,2,0,17.229994201660155
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,2048,16,0,2.5231056213378906
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,2048,32,0,1.3230287551879882
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,2048,64,0,0.9188015937805176
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,1,1,0,0.016872000694274903
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,1,4,0,0.014108799397945404
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,1,8,0,0.014079999923706055
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,1,16,0,0.014023999869823455
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,1,32,0,0.013787199556827546
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,1,64,0,0.013777600228786468
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,1,1,0,0.022438399493694305
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,1,2,0,0.02152000069618225
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,1,4,0,0.020017600059509276
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,1,8,0,0.020043200254440306
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,16,4,0,0.04816800057888031
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,1,16,0,0.019844800233840942
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,1,32,0,0.01973759979009628
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,1,64,0,0.01961439996957779
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,16,1,0,0.1446079969406128
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,16,2,0,0.08266720175743103
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,16,8,0,0.027793601155281067
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,16,4,0,0.05883359909057617
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,16,8,0,0.035892799496650696
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,16,16,0,0.019648000597953796
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,16,32,0,0.018796800076961516
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,16,64,0,0.017535999417304993
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,16,1,0,0.15315200090408326
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,16,2,0,0.09136319756507874
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,16,16,0,0.026984000205993654
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,16,32,0,0.025732800364494324
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,16,64,0,0.024801599979400634
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,32,1,0,0.2778208017349243
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,32,2,0,0.1517359972000122
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,32,2,0,0.15640159845352172
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,32,4,0,0.08542240262031556
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,32,8,0,0.051472002267837526
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,32,16,0,0.03045920133590698
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,32,32,0,0.02313600033521652
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,32,64,0,0.021966400742530822
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,32,1,0,0.2768095970153809
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,32,4,0,0.0959439992904663
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,32,8,0,0.06335840225219727
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,64,4,0,0.15311039686203004
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,32,16,0,0.04270719885826111
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,32,32,0,0.03285920023918152
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,32,64,0,0.03137759864330292
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,64,1,0,0.517796802520752
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,64,2,0,0.2729968070983887
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,64,8,0,0.09250400066375733
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,64,16,0,0.05788639783859253
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,64,32,0,0.039103999733924866
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,64,64,0,0.02903839945793152
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,64,1,0,0.5365664005279541
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,64,64,0,0.042742401361465454
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,64,2,0,0.2890448093414307
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,64,4,0,0.1663375973701477
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,64,8,0,0.10576000213623046
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,128,8,0,0.16974079608917236
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,128,16,0,0.10574400424957275
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,64,16,0,0.07306560277938842
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,64,32,0,0.0551360011100769
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,128,1,0,1.0836159706115722
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,128,2,0,0.57118239402771
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,128,4,0,0.32313439846038816
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,128,4,0,0.2980736017227173
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,128,32,0,0.07245759963989258
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,128,64,0,0.054953598976135255
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,128,1,0,1.1074048042297364
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,128,2,0,0.5955855846405029
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,128,8,0,0.19284640550613402
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,128,16,0,0.12938400506973266
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,128,32,0,0.09616960287094116
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,128,64,0,0.07780960202217102
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,256,1,0,2.745403289794922
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,256,2,0,1.3265968322753907
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,256,4,0,0.701364803314209
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,256,8,0,0.37162559032440184
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,256,16,0,0.22001760005950927
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,256,32,0,0.14451839923858642
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,256,64,0,0.10643199682235718
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,256,1,0,2.5348655700683596
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,256,2,0,1.3704336166381836
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,256,4,0,0.7414624214172363
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,256,8,0,0.4094064235687256
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,256,16,0,0.26001920700073244
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,256,32,0,0.18402719497680664
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,512,16,0,0.6238783836364746
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,256,64,0,0.14657119512557984
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,512,2,0,3.4488689422607424
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,512,1,0,6.835215759277344
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,512,4,0,1.918409538269043
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,512,8,0,0.9521488189697266
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,512,32,0,0.3778127908706665
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,512,64,0,0.2523184061050415
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,512,64,0,0.32845120429992675
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,512,1,0,6.56671371459961
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,512,2,0,3.473998260498047
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,512,4,0,1.846615982055664
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,512,8,0,1.0068191528320312
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,512,32,0,0.45372161865234373
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,512,16,0,0.7143104076385498
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,1024,4,0,5.265407943725586
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,1024,1,0,21.15699005126953
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,1024,8,0,2.6326351165771484
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,1024,2,0,10.650249481201172
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,1024,16,0,1.3898544311523438
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,1024,32,0,0.9238975524902344
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,1024,64,0,0.597057580947876
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,1024,8,0,2.754283142089844
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,1024,4,0,5.2948463439941404
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,1024,16,0,1.5428336143493653
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,1024,32,0,1.0596896171569825
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,1024,2,0,10.233702087402344
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,1024,64,0,0.7263951778411866
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,128,1,1,0,0.02784320116043091
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,1,2,0,0.017239999771118165
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,1,4,0,0.016143999993801117
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,1024,1,0,23.725125122070313
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,1,8,0,0.015897600352764128
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,1,16,0,0.015382400155067444
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,1,32,0,0.015699200332164764
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,1,64,0,0.01611520051956177
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,128,1,1,0,0.03302719891071319
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,1,2,0,0.02338079959154129
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,1,4,0,0.02197760045528412
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,1,8,0,0.021612800657749176
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,1,16,0,0.02143999934196472
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,1,32,0,0.02136960029602051
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,1,64,0,0.021196800470352172
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,128,16,1,0,0.2762768030166626
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,16,2,0,0.14683040380477905
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,16,4,0,0.08536159992218018
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,16,8,0,0.05193600058555603
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,16,16,0,0.03328480124473572
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,16,32,0,0.0232464000582695
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,16,64,0,0.02195200026035309
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,128,16,1,0,0.2767951965332031
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,16,2,0,0.1585536003112793
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,16,4,0,0.09748799800872802
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,16,8,0,0.0646016001701355
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,16,16,0,0.04469119906425476
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,16,32,0,0.03334720134735107
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,16,64,0,0.03231520056724548
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,128,32,1,0,0.518510389328003
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,32,2,0,0.2721440076828003
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,32,4,0,0.15382399559020996
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,32,8,0,0.09157440066337585
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,32,16,0,0.058259201049804685
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,32,32,0,0.03927519917488098
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,32,64,0,0.029686400294303895
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,128,32,1,0,0.5355455875396729
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,32,2,0,0.2906816005706787
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,32,4,0,0.16626720428466796
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,32,8,0,0.1074895977973938
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,32,16,0,0.0736464023590088
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,32,32,0,0.05623199939727783
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,32,64,0,0.043222400546073916
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,128,64,1,0,1.0930879592895508
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,64,2,0,0.5493408203125
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,64,4,0,0.29171841144561766
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,64,8,0,0.168123197555542
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,64,16,0,0.10690560340881347
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,64,32,0,0.07062399983406067
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,64,64,0,0.05342879891395569
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,128,64,1,0,1.0532464027404784
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,64,2,0,0.5729536056518555
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,64,4,0,0.30956480503082273
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,64,8,0,0.18922879695892333
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,64,16,0,0.12647039890289308
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,64,32,0,0.09448000192642211
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,64,64,0,0.07673919796943665
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,128,32,0,0.1335536003112793
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,128,128,1,0,2.1589616775512694
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,128,2,0,1.130244827270508
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,128,4,0,0.5874608039855957
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,128,8,0,0.32636799812316897
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,128,8,0,0.36619200706481936
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,128,16,0,0.19765440225601197
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,128,64,0,0.1007375955581665
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,128,128,1,0,2.2113344192504885
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,128,2,0,1.1656240463256835
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,128,4,0,0.6251599788665771
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,128,16,0,0.23907999992370604
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,128,32,0,0.17426879405975343
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,128,64,0,0.1408784031867981
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,128,256,1,0,5.015243148803711
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,256,2,0,2.643756866455078
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,256,4,0,1.392307186126709
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,256,8,0,0.7225743770599365
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,256,16,0,0.42420158386230467
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,256,32,0,0.2734976053237915
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,256,64,0,0.19691040515899658
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,128,256,1,0,5.652686309814453
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,256,2,0,2.697960090637207
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,256,4,0,1.472548770904541
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,256,8,0,0.7993311882019043
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,256,16,0,0.49707517623901365
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,512,4,0,3.5718704223632813
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,256,32,0,0.34811201095581057
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,256,64,0,0.27331199645996096
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,512,8,0,1.8445920944213867
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,512,2,0,8.097214508056641
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,128,512,1,0,13.748220825195313
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,512,16,0,1.267024040222168
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,512,64,0,0.4848320007324219
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,512,32,0,0.7284207820892334
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,128,512,1,0,13.398355102539062
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,512,4,0,3.7129135131835938
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,512,2,0,6.956486511230469
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,512,8,0,2.075209617614746
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,512,16,0,1.3905232429504395
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,512,32,0,0.8814240455627441
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,512,64,0,0.6263535976409912
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,1,32,0,0.022163200378417968
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,1,64,0,0.022382399439811705
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,256,1,1,0,0.049327999353408813
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,1,2,0,0.025923201441764833
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,1,4,0,0.023656000196933747
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,1,8,0,0.023131200671195985
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,1,16,0,0.022835199534893037
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,256,1,1,0,0.0562175989151001
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,1,2,0,0.03369440138339996
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,1,4,0,0.02943040132522583
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,1,8,0,0.02886880040168762
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,1,16,0,0.028303998708724975
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,1,32,0,0.0283376008272171
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,1,64,0,0.028519999980926514
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,256,16,1,0,0.5238656044006348
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,16,2,0,0.28210721015930174
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,16,4,0,0.15451040267944335
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,16,8,0,0.09293439984321594
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,16,16,0,0.05927039980888367
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,16,32,0,0.04045119881629944
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,16,64,0,0.035713601112365725
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,256,16,1,0,0.5302256107330322
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,16,2,0,0.29243199825286864
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,16,4,0,0.16836639642715454
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,16,8,0,0.10788480043411255
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,16,16,0,0.07502719759941101
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,16,32,0,0.054979199171066286
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,32,16,0,0.10541759729385376
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,32,32,0,0.07270560264587403
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,16,64,0,0.04907360076904297
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,256,32,1,0,1.0483103752136231
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,32,2,0,0.5315919876098633
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,32,4,0,0.2898911952972412
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,32,8,0,0.16729120016098023
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,32,64,0,0.053547197580337526
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,256,32,1,0,1.043246364593506
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,32,2,0,0.5614367961883545
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,32,4,0,0.31343679428100585
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,32,8,0,0.19141119718551636
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,32,16,0,0.12784479856491088
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,32,32,0,0.09588639736175537
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,32,64,0,0.07683039903640747
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,64,2,0,1.04890718460083
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,256,64,1,0,2.0140607833862303
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,64,4,0,0.567961597442627
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,64,8,0,0.32681119441986084
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,64,16,0,0.19842879772186278
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,64,32,0,0.13377439975738525
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,64,64,0,0.09997439980506898
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,256,64,1,0,2.072353553771973
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,64,2,0,1.0860976219177245
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,64,4,0,0.6020431995391846
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,64,8,0,0.3596911907196045
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,64,16,0,0.23764479160308838
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,64,32,0,0.17398560047149658
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,64,64,0,0.1394592046737671
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,256,128,1,0,4.662614440917968
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,128,2,0,2.2370512008666994
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,128,4,0,1.1497936248779297
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,128,8,0,0.634287977218628
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,128,16,0,0.38064160346984866
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,128,32,0,0.2523695945739746
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,128,64,0,0.18775199651718139
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,256,128,1,0,4.354734420776367
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,128,2,0,2.361555290222168
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,128,4,0,1.222976016998291
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,128,8,0,0.7092351913452148
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,256,4,0,2.942892837524414
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,128,256,256,1,0,10.120540618896484
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,128,16,0,0.4549520015716553
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,128,32,0,0.32723839282989503
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,128,64,0,0.26449439525604246
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,256,2,0,5.233975982666015
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,256,16,0,0.8273983955383301
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,256,8,0,1.4292863845825194
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,256,32,0,0.5425439834594726
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,256,64,0,0.38334240913391116
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,256,8,0,1.5835231781005858
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,256,4,0,2.924569511413574
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,256,2,0,5.8290657043457035
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,256,256,1,0,10.054532623291015
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,256,16,0,0.9715456008911133
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,256,32,0,0.671780776977539
SGLang,0.5.6.post2,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,256,64,0,0.5252639770507812
