framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,1,0.01313440054655075
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,1,0.012622399628162384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,1,0.01958400011062622
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,1,0.018931199610233308
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,1,0.01880960017442703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,1,0.019088000059127808
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,1,0.018796800076961516
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,1,0.019019199907779692
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,3,0.013025599718093871
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,1,0.018984000384807586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,3,0.012558400630950928
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,3,0.012280000001192093
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,3,0.01241919994354248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,3,0.012348800152540206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,3,0.0124719999730587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,3,0.012388800084590913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,3,0.019419200718402863
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,3,0.019679999351501463
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,3,0.019116799533367156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,1,0.01247519999742508
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,1,0.012412799894809723
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,3,0.018726399540901183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,3,0.019332799315452575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,1,0.012689599394798278
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,1,0.01239520013332367
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,1,0.01239679977297783
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,3,0.018991999328136444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,7,0.01318880021572113
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,3,0.018881599605083465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,7,0.012678399682044983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,7,0.01976799964904785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,7,0.019152000546455383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,7,0.012494400143623352
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,7,0.012380799651145935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,7,0.01234079971909523
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,7,0.01234079971909523
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,7,0.012308800220489502
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,7,0.018881599605083465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,7,0.018969599902629853
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,7,0.01892160028219223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,7,0.018943999707698823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,7,0.018987199664115904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,15,0.013184000551700593
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,15,0.012732799351215362
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,15,0.012483199685811996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,15,0.012436799705028534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,15,0.012511999905109405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,15,0.012596799433231354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,15,0.012459199875593185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,15,0.019790400564670563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,15,0.01913280040025711
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,15,0.018955199420452117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,15,0.019017599523067474
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,15,0.01881439983844757
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,15,0.018935999274253844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,15,0.018943999707698823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,31,0.013247999548912048
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,31,0.012836800515651703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,31,0.012464000284671784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,31,0.012408000230789185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,31,0.01249919980764389
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,31,0.012401600182056428
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,31,0.01244800016283989
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,31,0.019883200526237488
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,31,0.019361600279808044
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,31,0.019156800210475923
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,31,0.01892800033092499
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,31,0.019054399430751802
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,31,0.018993599712848662
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,31,0.018875199556350707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,63,0.01242400035262108
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,63,0.013240000605583191
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,63,0.019670400023460387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,63,0.012852799892425538
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,63,0.012547199428081513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,63,0.012537600100040435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,63,0.01239679977297783
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,63,0.01247519999742508
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,63,0.01945279985666275
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,127,0.014606399834156037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,63,0.019116799533367156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,63,0.018960000574588777
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,63,0.019036799669265747
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,63,0.01912800073623657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,63,0.018993599712848662
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,127,0.015121600031852723
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,127,0.014211200177669525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,127,0.014259199798107147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,127,0.014251199364662171
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,127,0.014230400323867798
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,127,0.014228799939155578
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,127,0.021563200652599333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,127,0.021147200465202333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,127,0.020985600352287293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,127,0.02077919989824295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,127,0.020785599946975708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,127,0.02086080014705658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,127,0.020614400506019592
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,255,0.0179967999458313
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,255,0.01759839951992035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,255,0.017419199645519256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,255,0.017403200268745422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,255,0.017164799571037292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,255,0.017451199889183044
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,255,0.017360000312328337
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,255,0.0245728000998497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,255,0.024195200204849242
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,255,0.02404319941997528
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,255,0.023955200612545014
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,255,0.02388159930706024
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,255,0.023894399404525757
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,255,0.024060800671577454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,511,0.018960000574588777
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,511,0.01791519969701767
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,511,0.01706400066614151
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,511,0.016774399578571318
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,511,0.017953599989414214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,511,0.01806560009717941
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,511,0.025854399800300597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,511,0.01823839992284775
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,511,0.02484000027179718
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,511,0.024035200476646423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,511,0.02378080040216446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,511,0.025119999051094057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,511,0.025198400020599365
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,511,0.02498079985380173
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,1023,0.01958400011062622
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,1023,0.018131199479103088
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,1023,0.01740639954805374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,1023,0.01701440066099167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,1023,0.018060800433158875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,1023,0.018137599527835845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,1023,0.01796479970216751
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,1023,0.026876801252365114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,1023,0.02553919851779938
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,1023,0.024587200582027437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,1023,0.02442079931497574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,1023,0.02543199956417084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,1023,0.025536000728607178
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,2047,0.018211199343204497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,1023,0.0253711998462677
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,2047,0.02018879950046539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,2047,0.018990400433540344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,2047,0.01810719966888428
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,2047,0.01770399957895279
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,2047,0.018377600610256194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,2047,0.01841759979724884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,2047,0.02928000092506409
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,2047,0.027556800842285158
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,2047,0.028391999006271363
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,2047,0.027051201462745665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,2047,0.027475199103355406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,2047,0.027444800734519957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,2047,0.02741760015487671
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,4095,0.022563199698925018
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,4095,0.021044799685478212
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,4095,0.01998399943113327
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,4095,0.01993280053138733
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,4095,0.020735999941825865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,4095,0.02069920003414154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,4095,0.020552000403404234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,4095,0.034641599655151366
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,4095,0.033246400952339175
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,4095,0.031947198510169986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,4095,0.03192479908466339
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,4095,0.03267680108547211
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,4095,0.03287839889526367
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,4095,0.0327919989824295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,8191,0.025846400856971742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,8191,0.024220800399780272
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,8191,0.02282879948616028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,8191,0.021827200055122377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,8191,0.023868800699710847
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,8191,0.02430559992790222
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,8191,0.024035200476646423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,8191,0.04406720101833343
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,8191,0.04174880087375641
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,8191,0.040582400560379026
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,8191,0.04052959978580475
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,8191,0.04203679859638214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,8191,0.04264479875564575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,8191,0.042331200838088986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,16383,0.029891198873519896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,16383,0.027102398872375488
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,16383,0.026276800036430358
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,16383,0.025820800662040712
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,16383,0.026096001267433167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,16383,0.026212799549102783
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,16383,0.026929599046707154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,16383,0.06721280217170715
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,16383,0.06094080209732056
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,16383,0.058011198043823244
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,16383,0.05645120143890381
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,16383,0.056766402721405027
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,16383,0.056846398115158084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,16383,0.05671039819717407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,32767,0.04288319945335388
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,32767,0.03865439891815185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,32767,0.03322399854660034
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,32767,0.03172000050544739
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,32767,0.09981120228767396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,32767,0.0311024010181427
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,32767,0.03075999915599823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,32767,0.030644801259040833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,32767,0.10347679853439332
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,32767,0.10044959783554078
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,32767,0.09917920231819152
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,32767,0.09942079782485962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,32767,0.09905279874801635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,32767,0.09899839758872986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,65535,0.05735039710998535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,65535,0.05307520031929016
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,65535,0.052691197395324706
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,65535,0.050697600841522215
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,65535,0.051767998933792116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,65535,0.051095998287200926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,65535,0.050460797548294065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,65535,0.17703039646148683
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,65535,0.17367360591888428
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,65535,0.17267839908599852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,65535,0.17165759801864625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,65535,0.17178239822387695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,65535,0.1717743992805481
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,1,131071,0.08912960290908814
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,65535,0.1713696002960205
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,2,131071,0.08305439949035645
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,4,131071,0.0785215973854065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,8,131071,0.07966560125350952
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,16,131071,0.07863039970397949
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,32,131071,0.07829279899597168
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1,1,64,131071,0.07840480208396912
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,1,131071,0.3188431978225708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,2,131071,0.31600959300994874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,4,131071,0.3151711940765381
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,8,131071,0.3134304046630859
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,16,131071,0.3134383916854858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,32,131071,0.31296639442443847
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1,1,64,131071,0.31324479579925535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,1,0.01318880021572113
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,1,0.0126351997256279
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,1,0.012436799705028534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,1,0.012455999851226807
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,1,0.01241919994354248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,1,0.012355200201272964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,1,0.012511999905109405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,1,0.019303999841213226
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,1,0.019041599333286287
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,1,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,1,0.018539200723171233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,1,0.018611200153827667
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,1,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,1,0.0186271995306015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,3,0.01318880021572113
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,3,0.012604799866676331
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,3,0.012598399817943574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,3,0.01228799968957901
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,3,0.012377600371837615
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,3,0.012537600100040435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,3,0.012439999729394913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,3,0.019487999379634857
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,3,0.018982400000095368
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,3,0.018751999735832213
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,3,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,3,0.018508799374103546
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,3,0.0186271995306015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,3,0.018619200587272643
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,7,0.013115200400352477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,7,0.01273919939994812
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,7,0.012540799379348756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,7,0.012324800342321396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,7,0.012441600114107132
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,7,0.012398400157690049
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,7,0.018729600310325622
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,7,0.018665599822998046
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,7,0.012374400347471236
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,7,0.019540800154209136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,7,0.018964800238609313
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,7,0.01880960017442703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,7,0.01854719966650009
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,7,0.01833920031785965
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,15,0.01311040073633194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,15,0.012902399897575379
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,15,0.012627199292182922
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,15,0.012580800056457519
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,15,0.012452799826860428
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,15,0.012408000230789185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,15,0.012603199481964112
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,15,0.019420799612998963
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,15,0.01897920072078705
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,15,0.01873439997434616
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,15,0.018796800076961516
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,15,0.01884479969739914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,15,0.01884319931268692
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,15,0.01865919977426529
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,31,0.013156799972057343
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,31,0.01271039992570877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,31,0.012520000338554382
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,31,0.012671999633312225
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,31,0.013475200533866883
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,31,0.012518399953842163
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,31,0.012577599287033081
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,31,0.019556799530982973
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,31,0.01929280012845993
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,31,0.018806399405002595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,31,0.018771199882030486
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,31,0.01887200027704239
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,31,0.018675200641155243
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,63,0.013283200562000275
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,31,0.018619200587272643
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,63,0.012860800325870513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,63,0.012547199428081513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,63,0.0124719999730587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,63,0.012535999715328216
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,63,0.01252799928188324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,63,0.012604799866676331
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,63,0.019592000544071196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,63,0.019023999571800232
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,63,0.018806399405002595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,63,0.018812799453735353
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,63,0.018753600120544434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,63,0.018739199638366698
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,63,0.0187376007437706
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,127,0.014961600303649902
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,127,0.014159999787807465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,127,0.014555199444293976
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,127,0.014215999841690063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,127,0.014120000600814819
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,127,0.014292800426483154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,127,0.014156800508499146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,127,0.021211199462413788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,127,0.02059040069580078
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,127,0.02070080041885376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,127,0.020326399803161622
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,127,0.020571200549602507
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,127,0.020443199574947356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,127,0.020582400262355804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,255,0.018052799999713896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,255,0.017617599666118623
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,255,0.017403200268745422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,255,0.017203199863433837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,255,0.017292800545692443
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,255,0.017182399332523347
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,255,0.023761600255966187
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,255,0.01724800020456314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,255,0.02452639937400818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,255,0.023979200422763823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,255,0.0237296000123024
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,255,0.023814399540424348
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,255,0.023630400002002717
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,255,0.023678399622440338
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,511,0.019014400243759156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,511,0.01809599995613098
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,511,0.017535999417304993
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,511,0.017272000014781953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,511,0.018001599609851836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,511,0.018228800594806673
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,511,0.018137599527835845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,511,0.02648639976978302
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,511,0.025193598866462708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,511,0.024294400215148927
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,511,0.02417919933795929
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,511,0.025139200687408447
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,511,0.025209599733352663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,511,0.025436800718307496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,1023,0.019515199959278105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,1023,0.018313600122928618
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,1023,0.017633600533008574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,1023,0.017502400279045104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,1023,0.018167999386787415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,1023,0.018382400274276733
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,1023,0.018163199722766876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,1023,0.029047998785972595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,1023,0.027265599370002745
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,1023,0.026500800251960756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,1023,0.026392000913619994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,1023,0.027083200216293336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,1023,0.02724800109863281
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,2047,0.01871519982814789
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,1023,0.027118399739265442
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,2047,0.020577600598335265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,2047,0.019518400728702544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,2047,0.018569600582122803
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,2047,0.03030399978160858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,2047,0.01835840046405792
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,2047,0.018481600284576415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,2047,0.01866080015897751
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,2047,0.0329584002494812
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,2047,0.03145439922809601
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,2047,0.03049120008945465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,2047,0.030187198519706727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,2047,0.030643200874328612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,2047,0.03049759864807129
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,4095,0.024025599658489227
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,4095,0.021110400557518005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,4095,0.02056480050086975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,4095,0.019916799664497376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,4095,0.03830080032348633
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,4095,0.02085919976234436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,4095,0.038699200749397276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,4095,0.02075680047273636
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,4095,0.020827199518680572
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,4095,0.04161919951438904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,4095,0.03942559957504273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,4095,0.03816800117492676
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,4095,0.03867200016975403
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,4095,0.03893760144710541
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,8191,0.028172799944877626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,8191,0.024953599274158477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,8191,0.02337760031223297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,8191,0.023051199316978455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,8191,0.025491198897361754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,8191,0.025979200005531312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,8191,0.026063999533653258
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,8191,0.0647823989391327
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,8191,0.05804640054702759
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,8191,0.054995197057724
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,8191,0.05425440073013306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,8191,0.056910401582717894
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,8191,0.05642560124397278
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,8191,0.05679360032081604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,16383,0.03938240110874176
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,16383,0.03548479974269867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,16383,0.030272001028060914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,16383,0.02821600139141083
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,16383,0.029919999837875366
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,16383,0.029931199550628663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,16383,0.030275198817253112
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,16383,0.10043200254440307
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,16383,0.09721919894218445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,16383,0.09563199877738952
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,16383,0.09532160162925721
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,16383,0.09751359820365905
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,16383,0.09830560088157654
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,16383,0.09812960028648376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,32767,0.055396801233291625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,32767,0.05190399885177612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,32767,0.04869920015335083
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,32767,0.04844320118427277
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,32767,0.049211201071739194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,32767,0.16895519495010375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,32767,0.05026559829711914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,32767,0.05002400279045105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,32767,0.17405920028686522
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,32767,0.1710576057434082
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,32767,0.16919840574264527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,32767,0.16994240283966064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,32767,0.17189760208129884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,32767,0.17108160257339478
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,65535,0.08493279814720153
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,65535,0.07917600274085998
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,65535,0.07641599774360656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,65535,0.07513120174407958
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,65535,0.0756111979484558
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,65535,0.07628160119056701
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,65535,0.07619360089302063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,65535,0.31598720550537107
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,65535,0.313153600692749
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,65535,0.3111471891403198
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,65535,0.30964639186859133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,65535,0.31180319786071775
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,65535,0.3113759994506836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,65535,0.3115664005279541
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,1,131071,0.1360592007637024
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,2,131071,0.1305807948112488
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,4,131071,0.12783039808273317
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,8,131071,0.1262719988822937
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,16,131071,0.12742559909820556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,1,131071,0.5931280136108399
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,2,131071,0.5898655891418457
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,32,131071,0.12739039659500123
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,2,1,64,131071,0.1266335964202881
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,4,131071,0.5854544162750244
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,1,0.013120000064373017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,8,131071,0.5845248222351074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,1,0.012824000418186187
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,16,131071,0.5853328227996826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,1,0.0124719999730587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,32,131071,0.5851376056671143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,2,1,64,131071,0.5857359886169433
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,1,0.012532800436019897
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,1,0.012443199753761292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,1,0.012454400211572647
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,1,0.012510399520397186
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,1,0.01980479955673218
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,1,0.019236800074577332
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,1,0.019049599766731262
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,1,0.019091199338436126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,1,0.018935999274253844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,1,0.01912959963083267
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,1,0.019032000005245207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,3,0.01332319974899292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,3,0.012827199697494508
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,3,0.012408000230789185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,3,0.012441600114107132
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,3,0.012494400143623352
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,3,0.012433599680662155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,3,0.01239359974861145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,3,0.019758400321006776
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,3,0.01926079988479614
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,3,0.01894560009241104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,3,0.018743999302387238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,3,0.018883199989795686
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,3,0.018940800428390504
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,3,0.019100800156593323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,7,0.013240000605583191
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,7,0.012800000607967377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,7,0.01268479973077774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,7,0.012383999675512314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,7,0.012436799705028534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,7,0.012503999471664428
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,7,0.01242239996790886
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,7,0.01958400011062622
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,7,0.019225600361824035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,7,0.019036799669265747
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,7,0.018900799751281738
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,7,0.018806399405002595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,7,0.018836799263954162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,7,0.01908479928970337
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,15,0.013286399841308593
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,15,0.012894399464130402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,15,0.012761600315570831
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,15,0.012427199631929398
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,15,0.012432000041007996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,15,0.012443199753761292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,15,0.012571200728416443
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,15,0.019619199633598327
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,15,0.01934880018234253
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,15,0.01914079934358597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,15,0.01886080056428909
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,15,0.018915200233459474
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,15,0.018916800618171692
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,15,0.019079999625682832
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,31,0.013214400410652161
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,31,0.012880000472068786
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,31,0.013044799864292144
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,31,0.012425599992275238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,31,0.012534399330615998
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,31,0.012438400089740754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,31,0.012486399710178375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,31,0.01969279944896698
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,31,0.019329600036144257
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,31,0.019225600361824035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,31,0.018838399648666383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,31,0.018889600038528444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,31,0.0189968004822731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,31,0.019043199717998505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,63,0.013347199559211731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,63,0.012860800325870513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,63,0.012748800218105316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,63,0.012727999687194824
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,63,0.01252480000257492
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,63,0.012559999525547028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,63,0.012636800110340119
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,63,0.01987359970808029
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,63,0.019480000436306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,63,0.019310399889945984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,63,0.019147199392318726
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,63,0.019339199364185333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,63,0.018982400000095368
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,63,0.01913280040025711
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,127,0.015566399693489075
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,127,0.014667199552059173
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,127,0.014382399618625641
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,127,0.01438400000333786
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,127,0.014399999380111694
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,127,0.014528000354766845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,127,0.014259199798107147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,127,0.021715199947357176
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,127,0.021279999613761903
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,127,0.020972800254821778
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,127,0.02104319930076599
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,127,0.0210207998752594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,127,0.02088800072669983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,127,0.02106720060110092
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,255,0.018007999658584593
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,255,0.017715199291706084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,255,0.017556799948215483
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,255,0.017228800058364867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,255,0.01746080070734024
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,255,0.01773280054330826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,255,0.01744319945573807
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,255,0.025342398881912233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,255,0.024480000138282776
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,255,0.024532799422740937
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,255,0.024531200528144836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,255,0.024553599953651428
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,255,0.02438880056142807
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,255,0.024427199363708497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,511,0.019636799395084382
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,511,0.018423999845981597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,511,0.01748639941215515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,511,0.017401599884033205
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,511,0.018212799727916718
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,511,0.018673600256443025
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,511,0.01870719939470291
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,511,0.028940799832344054
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,511,0.02786880135536194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,511,0.026814401149749756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,511,0.02614240050315857
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,511,0.027289599180221558
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,511,0.027513599395751952
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,511,0.027454400062561037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,1023,0.02069920003414154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,1023,0.018671999871730804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,1023,0.01790879964828491
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,1023,0.017766399681568144
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,1023,0.0181551992893219
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,1023,0.018926399946212768
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,1023,0.018475200235843658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,1023,0.03237119913101196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,1023,0.030721598863601686
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,1023,0.02985920011997223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,1023,0.02959359884262085
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,1023,0.030131199955940248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,1023,0.03049600124359131
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,1023,0.030422401428222657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,2047,0.0230320006608963
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,2047,0.020532800257205962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,2047,0.019223999977111817
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,2047,0.018875199556350707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,2047,0.018915200233459474
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,2047,0.01945119947195053
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,2047,0.019497600197792054
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,2047,0.04166400134563446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,2047,0.03843039870262146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,2047,0.03730080127716064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,2047,0.036857599020004274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,2047,0.03701280057430267
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,2047,0.037227201461791995
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,2047,0.03734880089759827
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,4095,0.026686400175094604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,4095,0.023464000225067137
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,4095,0.02206239998340607
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,4095,0.021775999665260316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,4095,0.022499200701713563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,4095,0.023137600719928743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,4095,0.022782400250434875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,4095,0.06283360123634338
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,4095,0.05719360113143921
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,4095,0.05470560193061828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,4095,0.052718400955200195
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,4095,0.05350559949874878
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,4095,0.05339679718017578
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,4095,0.05384640097618103
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,8191,0.038332799077034
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,8191,0.03394559919834137
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,8191,0.028505599498748778
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,8191,0.026199999451637267
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,8191,0.030158400535583496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,8191,0.030740800499916076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,8191,0.030151998996734618
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,8191,0.0985040009021759
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,8191,0.09541760087013244
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,8191,0.09446079730987549
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,8191,0.09370399713516235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,16383,0.04744159877300262
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,8191,0.09789119958877564
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,8191,0.09832479953765869
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,8191,0.09795519709587097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,16383,0.055452799797058104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,16383,0.04995200037956238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,16383,0.0460640013217926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,16383,0.049630400538444516
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,16383,0.049711999297142026
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,16383,0.05028960108757019
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,16383,0.1731503963470459
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,16383,0.16893279552459717
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,16383,0.1678223967552185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,32767,0.07857120037078857
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,16383,0.1675663948059082
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,16383,0.17098400592803956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,16383,0.17092000246047973
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,16383,0.17084319591522218
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,32767,0.0835968017578125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,32767,0.07509279847145081
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,32767,0.07365440130233765
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,32767,0.07386400103569031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,32767,0.07470399737358094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,32767,0.07634080052375794
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,32767,0.31444320678710935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,32767,0.31098721027374265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,32767,0.31024000644683836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,32767,0.3071696043014526
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,32767,0.3110575914382935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,32767,0.3105679988861084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,32767,0.31090240478515624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,65535,0.1244320034980774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,65535,0.1342144012451172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,65535,0.12831679582595826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,65535,0.12479519844055176
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,65535,0.1239967942237854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,65535,0.12685760259628295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,65535,0.12684160470962524
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,65535,0.5902495861053467
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,65535,0.585532808303833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,1,131071,0.23589599132537842
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,65535,0.5845088005065918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,65535,0.5815855979919433
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,65535,0.5854559898376465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,65535,0.5876944065093994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,65535,0.5843616008758545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,2,131071,0.2304975986480713
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,4,131071,0.2271888017654419
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,8,131071,0.22638239860534667
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,16,131071,0.2277440071105957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,32,131071,0.2281872034072876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,4,1,64,131071,0.2282304048538208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,1,131071,1.1380592346191407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,2,131071,1.1341567993164063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,4,131071,1.1313183784484864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,1,0.013318400084972381
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,1,0.012868799269199371
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,8,131071,1.1244095802307128
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,16,131071,1.13220157623291
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,1,0.012508800625801087
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,1,0.01252640038728714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,32,131071,1.1330975532531737
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,1,0.01252799928188324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,4,1,64,131071,1.1321855545043946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,1,0.012374400347471236
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,1,0.012596799433231354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,1,0.019280000030994414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,1,0.019793599843978882
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,1,0.019704000651836397
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,1,0.019415999948978423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,1,0.01950400024652481
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,1,0.01908479928970337
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,1,0.019166399538517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,3,0.01329759955406189
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,3,0.012740799784660339
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,3,0.01271360069513321
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,3,0.012697599828243256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,3,0.012604799866676331
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,3,0.01250080019235611
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,3,0.012428800016641617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,3,0.02007199972867966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,3,0.019392000138759614
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,3,0.019289599359035493
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,3,0.01918559968471527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,3,0.019227199256420135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,3,0.019204799830913544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,7,0.012598399817943574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,3,0.01929280012845993
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,7,0.013344000279903411
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,7,0.012787200510501862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,7,0.01271360069513321
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,7,0.012603199481964112
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,7,0.019273599982261656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,7,0.012433599680662155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,7,0.01926400065422058
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,7,0.012608000636100769
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,7,0.020024000108242034
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,7,0.019513599574565887
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,7,0.019424000382423402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,7,0.018991999328136444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,7,0.019280000030994414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,15,0.013342399895191193
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,15,0.013068799674510957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,15,0.020000000298023225
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,15,0.012759999930858612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,15,0.012569600343704223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,15,0.012761600315570831
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,15,0.012697599828243256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,15,0.012503999471664428
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,15,0.01939679980278015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,15,0.01951040029525757
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,15,0.019283199310302736
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,15,0.01931679993867874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,15,0.019380800426006317
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,15,0.01927199959754944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,31,0.013412800431251527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,31,0.01287200003862381
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,31,0.01263359934091568
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,31,0.012680000066757202
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,31,0.01266240030527115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,31,0.012600000202655792
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,31,0.012675200402736665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,31,0.02003519982099533
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,31,0.01956160068511963
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,31,0.019305600225925444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,31,0.019067199528217317
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,31,0.019475199282169342
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,31,0.019351999461650848
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,31,0.019361600279808044
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,63,0.013558399677276612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,63,0.01318880021572113
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,63,0.012708799540996551
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,63,0.012595200538635254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,63,0.012827199697494508
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,63,0.012854400277137756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,63,0.012878400087356568
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,63,0.02046239972114563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,63,0.019942399859428406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,63,0.019732800126075745
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,63,0.01950239986181259
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,63,0.019407999515533448
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,63,0.01966720074415207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,63,0.019500799477100372
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,127,0.015249599516391755
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,127,0.01480640023946762
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,127,0.01454080045223236
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,127,0.014431999623775482
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,127,0.014316800236701965
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,127,0.014345599710941315
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,127,0.02234079986810684
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,127,0.014507199823856353
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,127,0.02200320065021515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,127,0.02178879976272583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,127,0.021614399552345277
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,255,0.018454399704933167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,127,0.021515199542045595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,127,0.021401600539684297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,127,0.021542400121688843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,255,0.018080000579357148
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,255,0.017716799676418305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,255,0.017668800055980684
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,255,0.017566399276256563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,255,0.017534400522708892
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,255,0.017459200322628023
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,255,0.02720159888267517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,255,0.02659519910812378
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,255,0.02682720124721527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,255,0.026545599102973938
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,255,0.02643359899520874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,255,0.026292800903320312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,255,0.02622080147266388
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,511,0.021108800172805788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,511,0.018702399730682374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,511,0.018324799835681915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,511,0.01764480024576187
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,511,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,511,0.01873439997434616
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,511,0.01881919950246811
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,511,0.03254559934139252
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,511,0.030931198596954347
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,511,0.030079999566078187
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,511,0.030908799171447753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,1023,0.020076799392700195
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,511,0.030880001187324525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,1023,0.01860000044107437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,511,0.030659198760986328
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,511,0.029764801263809204
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,1023,0.023574399948120116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,1023,0.019174399971961974
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,1023,0.03708640038967133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,1023,0.019465599954128266
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,1023,0.01961439996957779
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,1023,0.019441600143909454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,1023,0.04090079963207245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,1023,0.038180801272392276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,1023,0.03668160140514374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,1023,0.037150400876998904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,1023,0.03765600025653839
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,1023,0.0378495991230011
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,2047,0.02528800070285797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,2047,0.021991999447345735
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,2047,0.02258560061454773
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,2047,0.021110400557518005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,2047,0.021107199788093566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,2047,0.021459199488162994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,2047,0.02173279970884323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,2047,0.06309599876403808
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,2047,0.05617920160293579
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,2047,0.053487998247146604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,2047,0.05267040133476257
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,2047,0.0529744029045105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,2047,0.05261759757995606
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,2047,0.05268639922142029
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,4095,0.03730080127716064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,4095,0.03323040008544922
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,4095,0.02738080024719238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,4095,0.02577120065689087
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,4095,0.027108800411224366
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,4095,0.027188798785209654
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,4095,0.02719680070877075
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,4095,0.09558719992637635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,4095,0.09829760193824769
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,4095,0.09440960288047791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,4095,0.09396160244941712
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,4095,0.09286080002784729
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,4095,0.0948032021522522
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,4095,0.0950223982334137
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,8191,0.04868319928646088
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,8191,0.054071998596191405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,8191,0.04720160067081451
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,8191,0.045291200280189514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,8191,0.049537599086761475
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,8191,0.04979679882526398
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,8191,0.04996959865093231
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,8191,0.1723407983779907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,8191,0.16832480430603028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,8191,0.17044800519943237
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,8191,0.1712496042251587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,8191,0.16595200300216675
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,8191,0.16686079502105713
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,16383,0.07350559830665589
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,16383,0.07620959877967834
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,16383,0.08435999751091003
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,16383,0.0764623999595642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,8191,0.16972320079803466
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,16383,0.3090111970901489
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,16383,0.07235360145568848
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,16383,0.0759663999080658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,16383,0.07679839730262757
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,16383,0.31380319595336914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,16383,0.31020159721374513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,16383,0.30750720500946044
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,16383,0.306113600730896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,16383,0.31096160411834717
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,32767,0.13337440490722657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,16383,0.31239519119262693
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,32767,0.1276479959487915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,32767,0.12408000230789185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,32767,0.12289279699325562
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,32767,0.1266319990158081
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,32767,0.12710399627685548
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,32767,0.5918831825256348
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,32767,0.12631839513778687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,32767,0.5845903873443603
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,32767,0.5823872089385986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,32767,0.5809967994689942
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,32767,0.5857855796813964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,32767,0.5854000091552735
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,65535,0.23565919399261476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,32767,0.5872848033905029
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,65535,0.2246016025543213
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,65535,0.2260143995285034
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,65535,0.22444479465484618
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,65535,0.22793760299682617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,65535,0.22946879863739014
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,65535,0.2286223888397217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,65535,1.1415023803710938
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,65535,1.1321999549865722
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,65535,1.1305600166320802
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,65535,1.1287455558776855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,65535,1.133471965789795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,65535,1.133664035797119
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,65535,1.1339391708374023
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,2,131071,0.4315743923187256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,1,131071,0.4381375789642334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,8,131071,0.42839679718017576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,4,131071,0.4294608116149902
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,8,1,64,131071,0.43143677711486816
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,32,131071,0.4316127777099609
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,16,131071,0.4313663959503174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,1,131071,2.2301776885986326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,2,131071,2.235905647277832
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,1,0.014287999272346497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,4,131071,2.224723243713379
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,8,131071,2.224496078491211
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,1,0.013195200264453888
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,1,0.012807999551296235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,1,0.012950399518013
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,16,131071,2.2196672439575194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,32,131071,2.223873519897461
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,8,1,64,131071,2.228191947937012
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,1,0.012668800354003907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,1,0.012702399492263794
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,1,0.012678399682044983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,1,0.01995519995689392
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,1,0.019512000679969787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,1,0.0191648006439209
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,1,0.019256000220775605
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,1,0.01919199973344803
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,1,0.019308799505233766
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,1,0.019198399782180787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,3,0.01366720050573349
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,3,0.012982399761676788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,3,0.012838399410247803
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,3,0.012665599584579468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,3,0.012727999687194824
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,3,0.012636800110340119
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,3,0.01268479973077774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,3,0.019480000436306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,3,0.019152000546455383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,3,0.019472000002861024
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,3,0.01995840072631836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,3,0.01918399930000305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,3,0.01903519928455353
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,3,0.01926079988479614
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,7,0.013518400490283966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,7,0.012756800651550293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,7,0.013148799538612366
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,7,0.01268640011548996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,7,0.012870399653911591
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,7,0.01271360069513321
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,7,0.012587200105190276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,7,0.019977599382400513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,15,0.01361600011587143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,7,0.019728000462055206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,7,0.01919520050287247
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,7,0.019227199256420135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,7,0.01913599967956543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,7,0.019393600523471832
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,7,0.019289599359035493
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,15,0.012987199425697326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,15,0.012814399600028992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,15,0.012931199371814727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,15,0.012707200646400452
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,15,0.01292639970779419
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,15,0.012771199643611907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,15,0.021137599647045136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,15,0.01973759979009628
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,15,0.019281600415706635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,15,0.019334399700164796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,15,0.019299200177192687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,15,0.019305600225925444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,15,0.01930239945650101
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,31,0.013651199638843536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,31,0.013488000631332398
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,31,0.013100799918174744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,31,0.012723200023174286
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,31,0.012966400384902954
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,31,0.01281599998474121
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,31,0.0130048006772995
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,31,0.020479999482631683
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,31,0.019574399292469024
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,31,0.019364799559116363
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,31,0.019758400321006776
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,31,0.01968639940023422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,31,0.019308799505233766
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,31,0.019470399618148802
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,63,0.015462400019168853
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,63,0.01342719942331314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,63,0.013483199477195739
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,63,0.013075199723243714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,63,0.012779200077056884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,63,0.01276479959487915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,63,0.012873600423336028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,63,0.020929600298404693
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,63,0.02021919935941696
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,63,0.019976000487804412
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,63,0.02003999948501587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,63,0.01988160014152527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,63,0.02038400024175644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,63,0.019870400428771973
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,127,0.017030400037765504
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,127,0.015254400670528412
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,127,0.01478240042924881
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,127,0.01480959951877594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,127,0.014790399372577668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,127,0.014528000354766845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,127,0.014612799882888794
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,127,0.024374400079250336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,127,0.023819200694561005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,127,0.023475199937820435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,127,0.023372800648212434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,127,0.023255999386310577
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,127,0.023187200725078582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,127,0.023350399732589722
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,255,0.018756799399852753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,255,0.018028800189495087
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,255,0.01777919977903366
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,255,0.01754080057144165
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,255,0.01757279932498932
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,255,0.01772480010986328
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,255,0.017884799838066102
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,255,0.029135999083518983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,255,0.0297760009765625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,255,0.030460798740386964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,255,0.02916960120201111
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,255,0.029071998596191407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,255,0.02926880121231079
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,255,0.02911520004272461
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,511,0.02353599965572357
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,511,0.020051200687885285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,511,0.01924320012331009
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,511,0.018751999735832213
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,511,0.02003999948501587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,511,0.01937119960784912
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,511,0.019963200390338897
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,511,0.03821280002593994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,511,0.03696480095386505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,511,0.03661920130252838
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,511,0.0423088014125824
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,511,0.037411201000213626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,511,0.0378928005695343
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,511,0.03764640092849732
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,1023,0.021878400444984437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,1023,0.02051839977502823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,1023,0.027051201462745665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,1023,0.02038239985704422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,1023,0.021353599429130555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,1023,0.021382400393486024
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,1023,0.02157119959592819
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,1023,0.0629696011543274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,1023,0.05260000228881836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,1023,0.056775999069213864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,1023,0.05249760150909424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,1023,0.052153599262237546
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,1023,0.05287200212478638
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,1023,0.052104002237319945
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,2047,0.03331040143966675
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,2047,0.029091200232505797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,2047,0.03850559890270233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,2047,0.025577598810195924
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,2047,0.025313600897789
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,2047,0.02600640058517456
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,2047,0.025916799902915955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,2047,0.09449920058250427
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,2047,0.0991648018360138
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,2047,0.09330880045890808
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,2047,0.09308000206947327
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,2047,0.09363200068473816
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,2047,0.09379199743270875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,2047,0.0937936007976532
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,4095,0.04563679993152618
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,4095,0.04818240106105805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,4095,0.05458239912986755
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,4095,0.04448800086975098
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,4095,0.04604479968547821
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,4095,0.16707520484924315
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,4095,0.04649919867515564
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,4095,0.04664640128612518
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,4095,0.17252320051193237
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,4095,0.1650720000267029
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,4095,0.1650928020477295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,4095,0.1664720058441162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,4095,0.1669312000274658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,4095,0.16766239404678346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,8191,0.08039839863777161
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,8191,0.0741599977016449
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,8191,0.0709168016910553
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,8191,0.06988160014152527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,8191,0.0741599977016449
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,8191,0.07443199753761291
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,8191,0.07451679706573486
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,8191,0.30752480030059814
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,8191,0.3122976064682007
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,8191,0.3051568031311035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,8191,0.3058255910873413
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,8191,0.30949599742889405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,16383,0.1239967942237854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,8191,0.30870399475097654
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,16383,0.12984639406204224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,16383,0.12103999853134155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,8191,0.31103200912475587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,16383,0.11987999677658082
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,16383,0.1239408016204834
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,16383,0.12470400333404541
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,16383,0.12456640005111694
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,16383,0.5866159915924072
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,16383,0.5807231903076172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,16383,0.5807807922363282
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,16383,0.580511999130249
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,16383,0.5826496124267578
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,16383,0.5838511943817138
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,32767,0.2309328079223633
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,16383,0.5832176208496094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,32767,0.22396960258483886
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,32767,0.2202608108520508
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,32767,0.2195568084716797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,32767,0.22260639667510987
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,32767,0.2237391948699951
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,32767,0.22367680072784424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,32767,1.1317919731140136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,32767,1.126908779144287
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,32767,1.1247360229492187
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,32767,1.1237711906433105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,32767,1.1288000106811524
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,65535,0.4303743839263916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,32767,1.1272543907165526
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,65535,0.424289608001709
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,32767,1.1290927886962892
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,65535,0.41992640495300293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,65535,0.41885762214660643
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,65535,0.42267518043518065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,65535,0.42206878662109376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,65535,0.4236623764038086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,65535,2.222115135192871
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,65535,2.2154048919677733
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,1,131071,0.827126407623291
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,65535,2.216935920715332
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,65535,2.2134815216064454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,65535,2.2106111526489256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,65535,2.226478385925293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,65535,2.2188207626342775
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,2,131071,0.8195391654968261
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,4,131071,0.8160783767700195
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,8,131071,0.814356803894043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,16,131071,0.8180080413818359
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,32,131071,0.8179295539855957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,16,1,64,131071,0.8169584274291992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,1,0.015427200496196747
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,1,0.014105600118637086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,1,0.01361600011587143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,1,0.013310399651527405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,1,0.013441599905490875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,1,0.013398399949073792
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,1,0.013321599364280701
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,1,0.021414400637149812
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,1,0.020190399885177613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,1,131071,4.4056846618652346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,2,131071,4.396385574340821
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,1,0.019704000651836397
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,1,0.019620800018310548
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,4,131071,4.3959297180175785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,8,131071,4.36497917175293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,1,0.019555200636386872
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,1,0.01977760046720505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,16,131071,4.383910369873047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,32,131071,4.400054550170898
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,1,0.019644799828529357
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,16,1,64,131071,4.384478378295898
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,3,0.015031999349594117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,3,0.013672000169754029
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,3,0.013491199910640716
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,3,0.013228799402713775
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,3,0.013699199259281158
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,3,0.013094399869441987
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,3,0.013299199938774108
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,3,0.021540799736976625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,3,0.02020000070333481
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,3,0.019836799800395967
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,3,0.019889600574970245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,3,0.02028000056743622
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,3,0.019827200472354888
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,3,0.020025600492954255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,7,0.01327040046453476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,7,0.01525920033454895
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,7,0.013809600472450256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,7,0.013471999764442444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,7,0.013376000523567199
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,7,0.02014240026473999
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,7,0.013736000657081604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,7,0.013359999656677246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,7,0.022060799598693847
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,7,0.020351999998092653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,7,0.019988800585269927
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,7,0.020235200226306916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,7,0.020230400562286376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,7,0.020164799690246583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,15,0.014912000298500061
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,15,0.013787199556827546
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,15,0.013900800049304963
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,15,0.013465599715709686
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,15,0.013398399949073792
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,15,0.013764800131320953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,15,0.013683199882507324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,15,0.02208320051431656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,15,0.02033279985189438
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,15,0.020351999998092653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,15,0.02040800005197525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,15,0.02005600035190582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,15,0.019937600195407867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,15,0.02032800018787384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,31,0.015340800583362579
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,31,0.014023999869823455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,31,0.013607999682426453
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,31,0.013488000631332398
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,31,0.013660800457000733
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,31,0.013568000495433807
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,31,0.013441599905490875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,31,0.022489599883556366
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,31,0.020982399582862854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,31,0.020398400723934174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,31,0.020230400562286376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,31,0.02056799978017807
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,31,0.020876799523830415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,31,0.020611199736595153
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,63,0.016172799468040466
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,63,0.014505599439144135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,63,0.014345599710941315
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,63,0.013926400244235993
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,63,0.02274720072746277
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,63,0.013753600418567657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,63,0.014108799397945404
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,63,0.014071999490261078
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,63,0.024172799289226533
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,63,0.023080000281333925
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,63,0.02284640073776245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,63,0.02296479940414429
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,63,0.02279680073261261
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,63,0.0227183997631073
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,127,0.017398400604724883
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,127,0.016070400178432465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,127,0.015907199680805208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,127,0.01549919992685318
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,127,0.01573439985513687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,127,0.016143999993801117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,127,0.01600639969110489
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,127,0.02868640124797821
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,127,0.027697598934173583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,127,0.027219200134277345
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,127,0.02709920108318329
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,127,0.02705279886722565
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,127,0.027160000801086426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,127,0.027140799164772033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,255,0.01903039962053299
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,255,0.02067359983921051
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,255,0.019208000600337984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,255,0.019099199771881105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,255,0.018782399594783783
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,255,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,255,0.018755200505256652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,255,0.03779360055923462
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,255,0.036374399065971376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,255,0.03608959913253784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,511,0.02128800004720688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,255,0.035939198732376096
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,255,0.03598560094833374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,255,0.03593280017375946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,511,0.021963199973106383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,255,0.036022400856018065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,511,0.02985759973526001
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,511,0.023852799832820893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,511,0.02183839976787567
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,511,0.022566400468349457
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,511,0.021699200570583343
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,511,0.06591200232505798
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,511,0.058931201696395874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,511,0.053985601663589476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,511,0.052795201539993286
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,511,0.05351520180702209
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,511,0.05375199913978577
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,511,0.05260800123214722
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,1023,0.04195519983768463
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,1023,0.034561601281166074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,1023,0.030344000458717345
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,1023,0.10199840068817138
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,1023,0.024779200553894043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,1023,0.024665600061416625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,1023,0.025094398856163026
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,1023,0.025011199712753295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,1023,0.09327679872512817
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,1023,0.09492160081863403
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,1023,0.09340959787368774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,1023,0.09272480010986328
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,1023,0.09324799776077271
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,1023,0.09308480024337769
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,2047,0.05724319815635681
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,2047,0.04907360076904297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,2047,0.04619520008563995
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,2047,0.044784000515937804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,2047,0.04549280107021332
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,2047,0.045579200983047484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,2047,0.045300799608230594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,2047,0.17477600574493407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,2047,0.16827679872512818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,2047,0.16562720537185668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,2047,0.16518559455871581
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,2047,0.16685279607772827
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,2047,0.16619679927825928
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,2047,0.1659168004989624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,4095,0.08294720053672791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,4095,0.07482560276985169
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,4095,0.07175679802894593
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,4095,0.06971840262413025
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,4095,0.3082240104675293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,4095,0.07130879759788514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,4095,0.07229120135307313
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,4095,0.07149760127067566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,4095,0.31413280963897705
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,4095,0.3068671941757202
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,4095,0.3059664011001587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,4095,0.30471200942993165
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,4095,0.3064079999923706
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,4095,0.3072832107543945
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,8191,0.13305599689483644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,8191,0.1251871943473816
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,8191,0.12138079404830933
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,8191,0.1202191948890686
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,8191,0.5887919902801514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,8191,0.12434719800949097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,8191,0.1255728006362915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,8191,0.1250208020210266
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,8191,0.5840847969055176
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,8191,0.581270408630371
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,8191,0.5791071891784668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,8191,0.5832399845123291
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,8191,0.5840799808502197
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,8191,0.5840240001678467
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,16383,0.23331201076507568
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,16383,0.22502560615539552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,16383,0.2239759922027588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,16383,0.2214224100112915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,16383,0.21871678829193114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,16383,0.22387359142303467
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,16383,0.22427198886871338
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,16383,1.1345328330993651
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,16383,1.1291168212890625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,16383,1.1249967575073243
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,16383,1.1235343933105468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,16383,1.1270784378051757
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,16383,1.1327936172485351
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,32767,0.4332831859588623
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,32767,0.42529120445251467
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,32767,0.4212063789367676
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,16383,1.1291152000427247
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,32767,0.41842241287231446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,32767,0.4222080230712891
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,32767,0.4230912208557129
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,32767,0.422486400604248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,32767,2.2188720703125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,32767,2.2198272705078126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,1,65535,0.8290911674499511
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,32767,2.211270332336426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,32767,2.2152288436889647
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,32767,2.2150943756103514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,32767,2.2189632415771485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,32767,2.2169376373291017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,2,65535,0.8198127746582031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,4,65535,0.8153087615966796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,8,65535,0.813481616973877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,16,65535,0.8195535659790039
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,32,65535,0.8176848411560058
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,32,1,64,65535,0.8179471969604493
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,1,65535,4.407024002075195
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,2,65535,4.386905670166016
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,4,65535,4.358816146850586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,8,65535,4.389384078979492
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,1,0.015855999290943147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,16,65535,4.384692764282226
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,1,0.014291200041770934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,32,65535,4.385137557983398
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,32,1,64,65535,4.370719909667969
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,1,0.014281600713729858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,1,0.014110399782657624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,1,0.014280000329017639
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,1,0.014392000436782838
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,1,0.013990400731563568
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,1,0.022580799460411072
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,1,0.02101600021123886
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,1,0.020950399339199066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,1,0.0205375999212265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,1,0.020686399936676026
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,1,0.020556800067424774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,1,0.020960000157356263
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,3,0.015641599893569946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,3,0.014644800126552582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,3,0.014668799936771393
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,3,0.013972799479961395
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,3,0.014204800128936768
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,3,0.013920000195503235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,3,0.014256000518798828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,3,0.022700800001621245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,3,0.021191999316215515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,3,0.020776000618934632
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,3,0.021196800470352172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,3,0.02089280039072037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,3,0.02075839936733246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,3,0.020609599351882935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,7,0.015779200196266174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,7,0.014867199957370758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,7,0.014412799477577209
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,7,0.014236800372600555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,7,0.014324800670146942
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,7,0.014207999408245086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,7,0.014616000652313232
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,7,0.022312000393867493
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,7,0.021273599565029146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,7,0.021073600649833678
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,15,0.01478240042924881
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,7,0.02091200053691864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,7,0.02099359929561615
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,7,0.021187199652194975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,7,0.02115679979324341
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,15,0.015872000157833098
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,15,0.014827199280261993
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,15,0.014419199526309967
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,15,0.014387199282646179
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,15,0.014606399834156037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,15,0.014523200690746307
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,15,0.022681599855422972
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,15,0.021590399742126464
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,31,0.015044799447059632
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,15,0.021748800575733186
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,15,0.021422399580478667
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,15,0.02131199985742569
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,15,0.021649600565433504
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,15,0.021524800360202788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,31,0.016307200491428375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,31,0.014382399618625641
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,31,0.014788800477981567
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,31,0.014265599846839904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,31,0.014375999569892883
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,31,0.014529600739479065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,31,0.0248879998922348
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,31,0.02370239943265915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,31,0.023643200099468232
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,31,0.02319840043783188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,31,0.023316800594329834
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,31,0.02348479926586151
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,31,0.02343519926071167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,63,0.016527999937534333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,63,0.01534239947795868
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,63,0.014878399670124054
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,63,0.015491199493408204
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,63,0.014856000244617463
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,63,0.014923200011253357
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,63,0.015433600544929505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,63,0.028118398785591126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,127,0.018182399868965148
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,63,0.026956799626350402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,63,0.026684799790382387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,63,0.026668798923492432
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,63,0.026451200246810913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,63,0.026649600267410277
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,63,0.026761600375175477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,127,0.017100800573825837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,127,0.017187200486660004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,127,0.016921600699424742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,127,0.016497600078582763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,127,0.017047999799251555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,127,0.017105600237846373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,127,0.03664000034332275
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,127,0.034811198711395264
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,127,0.034078401327133176
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,127,0.03379679918289184
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,127,0.033795198798179625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,127,0.03411999940872192
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,127,0.034041601419448855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,255,0.02237759977579117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,255,0.020678399503231047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,255,0.020604799687862396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,255,0.019913600385189058
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,255,0.02011840045452118
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,255,0.01979999989271164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,255,0.020024000108242034
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,255,0.05604640245437622
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,255,0.05234079957008362
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,255,0.0513264000415802
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,255,0.05029600262641907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,255,0.05011039972305298
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,511,0.030534398555755616
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,255,0.049825599789619444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,255,0.050457602739334105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,511,0.045952001214027406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,511,0.0368800014257431
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,511,0.02764959931373596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,511,0.025726398825645445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,511,0.027161601185798644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,511,0.02672159969806671
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,511,0.10599679946899414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,511,0.09785119891166687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,511,0.09452800154685974
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,511,0.09261279702186584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,511,0.09448639750480652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,511,0.09543359875679017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,511,0.09485759735107421
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,1023,0.06115679740905762
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,1023,0.05185440182685852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,1023,0.047295999526977536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,1023,0.045230400562286374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,1023,0.04642559885978699
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,1023,0.046854400634765626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,1023,0.04681600034236908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,1023,0.17860480546951293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,1023,0.17150239944458007
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,1023,0.16827199459075928
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,1023,0.16511679887771608
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,1023,0.16739519834518432
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,1023,0.16790879964828492
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,1023,0.1686576008796692
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,2047,0.08640959858894348
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,2047,0.07765759825706482
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,2047,0.07276480197906494
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,2047,0.07072319984436035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,2047,0.31069440841674806
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,2047,0.07084320187568664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,2047,0.07145599722862243
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,2047,0.0719215989112854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,2047,0.31926560401916504
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,2047,0.3074624061584473
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,2047,0.3068687915802002
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,2047,0.3073024034500122
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,2047,0.3073312044143677
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,2047,0.30739679336547854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,4095,0.13752959966659545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,4095,0.12747199535369874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,4095,0.12271360158920289
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,4095,0.12052479982376099
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,4095,0.12312159538269044
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,4095,0.12512799501419067
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,4095,0.12490880489349365
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,4095,0.5950831890106201
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,4095,0.5861775875091553
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,4095,0.5803408145904541
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,4095,0.5808127880096435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,4095,0.5818031787872314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,4095,0.5861248016357422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,4095,0.5858384132385254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,8191,0.23807199001312257
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,8191,0.2286223888397217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,8191,0.22208640575408936
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,8191,0.21914560794830323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,8191,0.22523679733276367
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,8191,0.2285151958465576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,8191,0.2283695936203003
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,8191,1.143785572052002
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,8191,1.1295023918151856
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,8191,1.1306143760681153
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,8191,1.128433609008789
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,8191,1.1303520202636719
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,8191,1.1387727737426758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,16383,0.4374991893768311
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,8191,1.1348015785217285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,16383,0.42755999565124514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,16383,0.4214335918426514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,16383,0.4178959846496582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,16383,0.42358078956604006
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,16383,0.42763681411743165
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,16383,0.42812161445617675
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,16383,2.225484848022461
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,16383,2.2222543716430665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,16383,2.210049629211426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,16383,2.213091278076172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,16383,2.2134143829345705
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,16383,2.224577522277832
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,16383,2.224124717712402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,1,32767,0.8340512275695801
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,2,32767,0.8242015838623047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,4,32767,0.8173007965087891
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,8,32767,0.8139328002929688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,16,32767,0.8187264442443848
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,32,32767,0.8233327865600586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,64,1,64,32767,0.8230768203735351
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,1,0.018035200238227845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,1,0.01648640036582947
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,1,0.015827199816703795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,1,32767,4.388449478149414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,2,32767,4.403047943115235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,1,0.015539200603961944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,4,32767,4.382724761962891
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,8,32767,4.377779388427735
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,1,0.0156016007065773
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,16,32767,4.386393737792969
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,1,0.02268480062484741
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,1,0.01581120043992996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,32,32767,4.406220626831055
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,1,0.01584160029888153
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,1,0.024668799340724946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,64,1,64,32767,4.391635131835938
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,1,0.022542400658130644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,1,0.022763200104236603
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,1,0.022806400060653688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,1,0.02237440049648285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,1,0.022808000445365906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,3,0.018267199397087097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,3,0.016011199355125426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,3,0.0159183993935585
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,3,0.015876799821853638
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,3,0.015872000157833098
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,3,0.015694400668144225
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,3,0.016073599457740784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,3,0.02505599856376648
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,3,0.023321600258350374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,3,0.022567999362945557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,3,0.022249600291252135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,3,0.022873599827289582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,3,0.022598400712013245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,3,0.022439999878406523
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,7,0.017643199861049653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,7,0.01621119976043701
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,7,0.016310399770736693
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,7,0.015956799685955047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,7,0.01605599969625473
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,7,0.015974399447441102
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,7,0.015807999670505522
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,7,0.024982400238513947
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,7,0.023099200427532197
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,7,0.02316959947347641
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,7,0.02333440035581589
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,7,0.023366400599479677
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,7,0.023078399896621703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,7,0.02285439968109131
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,15,0.017902399599552154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,15,0.016363200545310975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,15,0.01629280000925064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,15,0.0161423996090889
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,15,0.016411200165748596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,15,0.015964800119400026
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,15,0.016387200355529784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,15,0.02696160078048706
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,15,0.025731199979782106
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,15,0.02561439871788025
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,15,0.025286400318145753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,15,0.025196799635887147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,15,0.02502079904079437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,15,0.025305598974227905
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,31,0.018537600338459016
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,31,0.01666879951953888
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,31,0.016025599837303162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,31,0.016422399878501893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,31,0.01611039936542511
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,31,0.015913599729537965
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,31,0.015996800363063814
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,31,0.03025279939174652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,31,0.027758398652076723
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,63,0.017195199429988862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,31,0.027856001257896425
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,31,0.0275983989238739
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,31,0.02799679934978485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,31,0.0272816002368927
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,31,0.027932798862457274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,63,0.019249600172042847
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,63,0.016574400663375854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,63,0.017100800573825837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,63,0.017054399847984313
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,63,0.017008000612258913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,63,0.016891199350357055
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,63,0.038657599687576295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,63,0.03473120033740997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,63,0.034513598680496214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,63,0.03404960036277771
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,63,0.0343392014503479
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,63,0.03390240073204041
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,63,0.03443840146064758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,127,0.022811199724674224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,127,0.019568000733852387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,127,0.019358399510383605
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,127,0.018624000251293182
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,127,0.018916800618171692
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,127,0.0188400000333786
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,127,0.019739200174808503
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,127,0.058575999736785886
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,127,0.053774398565292356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,127,0.05117120146751404
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,255,0.03115360140800476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,127,0.049465599656105044
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,127,0.0499103993177414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,127,0.04902719855308533
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,127,0.049620801210403444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,255,0.034727999567985536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,255,0.026416000723838807
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,255,0.02423039972782135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,255,0.02388159930706024
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,255,0.023367999494075774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,255,0.023758399486541747
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,255,0.09494720101356506
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,255,0.09139519929885864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,255,0.09024959802627563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,255,0.08926240205764771
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,255,0.08887199759483337
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,255,0.0893392026424408
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,511,0.04407680034637451
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,255,0.08888959884643555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,511,0.05325440168380737
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,511,0.17229599952697755
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,511,0.048440000414848326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,511,0.04639999866485596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,511,0.04500640034675598
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,511,0.16539520025253296
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,511,0.044582399725914004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,511,0.04503520131111145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,511,0.16815999746322632
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,511,0.16694079637527465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,511,0.16474720239639282
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,511,0.16525280475616455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,511,0.16497119665145873
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,1023,0.07822239995002747
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,1023,0.07324479818344116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,1023,0.07206400036811829
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,1023,0.31119039058685305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,1023,0.07028800249099731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,1023,0.0701856017112732
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,1023,0.06876800060272217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,1023,0.07076799869537354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,1023,0.3077327966690063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,1023,0.30505759716033937
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,1023,0.3061568021774292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,1023,0.3047823905944824
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,1023,0.30444960594177245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,1023,0.30553278923034666
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,2047,0.11960480213165284
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,2047,0.12847360372543334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,2047,0.12313439846038818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,2047,0.12153120040893554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,2047,0.11978880167007447
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,2047,0.12083519697189331
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,2047,0.12004799842834472
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,2047,0.5872528076171875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,2047,0.580836820602417
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,2047,0.5789904117584228
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,2047,0.5785632133483887
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,2047,0.5785056114196777
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,2047,0.5772575855255127
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,2047,0.5800992012023926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,4095,0.22792320251464843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,4095,0.2228480100631714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,4095,0.22036640644073485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,4095,0.218887996673584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,4095,0.21776959896087647
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,4095,0.21805601119995116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,4095,0.2169424057006836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,4095,1.1344976425170898
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,4095,1.1264287948608398
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,4095,1.1286992073059081
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,4095,1.1235055923461914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,4095,1.1235808372497558
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,8191,0.42754402160644533
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,4095,1.1230416297912598
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,4095,1.1232208251953124
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,8191,0.42811999320983884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,8191,0.41948637962341306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,8191,0.4176943778991699
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,8191,0.41624960899353025
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,8191,0.41609759330749513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,8191,0.41553440093994143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,8191,2.2230384826660154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,8191,2.218707275390625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,1,16383,0.822913646697998
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,8191,2.210723114013672
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,8191,2.212006378173828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,8191,2.2118608474731447
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,8191,2.2139392852783204
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,8191,2.205735969543457
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,2,16383,0.8182559967041015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,4,16383,0.8148655891418457
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,8,16383,0.8132431983947754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,16,16383,0.8130687713623047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,32,16383,0.8121232032775879
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,128,1,64,16383,0.8137455940246582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,1,16383,4.388051223754883
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,2,16383,4.385670471191406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,4,16383,4.405593490600586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,8,16383,4.3796638488769535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,16,16383,4.374062347412109
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,1,0.028207999467849732
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,32,16383,4.378852844238281
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,128,1,64,16383,4.378472137451172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,1,0.02356960028409958
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,1,0.023686400055885314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,1,0.022819200158119203
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,1,0.022574399411678315
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,1,0.022731199860572815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,1,0.022835199534893037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,1,0.037411201000213626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,1,0.030763199925422667
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,1,0.03009760081768036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,1,0.029446399211883544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,3,0.02252320051193237
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,1,0.0292959988117218
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,1,0.02926880121231079
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,1,0.029676800966262816
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,3,0.027551999688148497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,3,0.023455999791622162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,3,0.022935999929904936
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,3,0.023158399760723113
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,3,0.02298559993505478
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,3,0.02247840017080307
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,3,0.03697920143604279
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,3,0.030935999751091004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,3,0.030372801423072814
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,3,0.030033600330352784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,3,0.029899200797080992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,3,0.030096000432968138
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,3,0.030339199304580688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,7,0.02831679880619049
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,7,0.023479999601840974
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,7,0.023193599283695222
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,7,0.02313600033521652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,7,0.023051199316978455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,7,0.02282879948616028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,7,0.022635200619697572
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,7,0.040033599734306334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,7,0.03304480016231537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,7,0.03211199939250946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,7,0.03223040103912354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,7,0.03204959928989411
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,7,0.03207840025424957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,7,0.031414398550987245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,15,0.02873600125312805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,15,0.023982399702072145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,15,0.023401600122451783
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,15,0.023057599365711213
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,15,0.03503200113773346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,15,0.023193599283695222
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,15,0.023019200563430785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,15,0.022811199724674224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,15,0.045510399341583255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,15,0.03585920035839081
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,15,0.03522239923477173
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,15,0.034756800532341
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,15,0.034760001301765445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,15,0.03438239991664886
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,31,0.03136320114135742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,31,0.023871999979019166
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,31,0.023387199640274046
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,31,0.023563200235366823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,31,0.022835199534893037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,31,0.023035199940204622
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,31,0.022856000065803527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,31,0.053339201211929324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,31,0.04349919855594635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,31,0.04130719900131226
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,31,0.04071359932422638
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,31,0.04072799980640411
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,31,0.04016799926757812
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,31,0.04022080004215241
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,63,0.03770079910755157
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,63,0.02619520127773285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,63,0.0248416006565094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,63,0.02489120066165924
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,63,0.023787200450897217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,63,0.024355199933052064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,63,0.0239439994096756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,63,0.07047520279884338
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,63,0.06345120072364807
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,63,0.058340799808502194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,63,0.05608479976654053
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,63,0.055534398555755614
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,63,0.05446079969406128
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,63,0.054073601961135864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,127,0.044065600633621214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,127,0.03945119976997376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,127,0.03621920049190521
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,127,0.03400000035762787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,127,0.03020159900188446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,127,0.029203200340270997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,127,0.029096001386642457
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,127,0.09664319753646851
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,127,0.10813280344009399
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,127,0.10204639434814453
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,127,0.09952960014343262
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,127,0.09728959798812867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,127,0.09630399942398071
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,127,0.09632639884948731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,255,0.058727997541427615
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,255,0.05232959985733032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,255,0.05050399899482727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,255,0.04909760057926178
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,255,0.048286399245262145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,255,0.0468639999628067
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,255,0.047142401337623596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,255,0.1789695978164673
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,255,0.17371679544448854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,255,0.17137759923934937
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,255,0.17140640020370485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,255,0.16894880533218384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,255,0.16939040422439575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,255,0.17048959732055663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,511,0.09490399956703185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,511,0.08469920158386231
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,511,0.07979840040206909
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,511,0.07755680084228515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,511,0.07656959891319275
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,511,0.07633280158042907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,511,0.07584159970283508
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,511,0.3295183897018433
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,511,0.32008159160614014
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,511,0.31541440486907957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,511,0.3144383907318115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,1023,0.1341920018196106
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,511,0.31502881050109866
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,511,0.3132560014724731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,511,0.31371519565582273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,1023,0.14324159622192384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,1023,0.12974720001220702
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,1023,0.12609599828720092
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,1023,0.12520480155944824
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,1023,0.12479519844055176
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,1023,0.12489279508590698
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,1023,0.6014416217803955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,1023,0.5933519840240479
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,1023,0.5883359909057617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,1023,0.5860832214355469
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,1023,0.5849152088165284
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,1023,0.5847487926483155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,2047,0.24158720970153807
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,1023,0.5869823932647705
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,2047,0.23256320953369142
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,2047,0.22804160118103028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,2047,0.22527360916137695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,2047,0.22439839839935302
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,2047,0.22344160079956055
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,2047,0.22423040866851807
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,2047,1.144968032836914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,2047,1.1374575614929199
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,2047,1.1334815979003907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,2047,1.1299839973449708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,2047,1.1306655883789063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,4095,0.44009599685668943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,2047,1.128598403930664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,2047,1.129748821258545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,4095,0.4321119785308838
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,4095,0.4257984161376953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,4095,0.4226111888885498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,4095,0.421127986907959
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,4095,0.42034239768981935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,4095,0.42034077644348145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,4095,2.2358463287353514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,1,8191,0.8346832275390625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,4095,2.226185607910156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,4095,2.2304031372070314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,4095,2.2271888732910154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,4095,2.215291213989258
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,4095,2.2178319931030273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,4095,2.2167728424072264
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,2,8191,0.8623647689819336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,4,8191,0.8319295883178711
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,8,8191,0.8189743995666504
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,16,8191,0.8153488159179687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,32,8191,0.814902400970459
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,256,1,64,8191,0.8161711692810059
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,1,0.05512639880180359
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,1,0.04132960140705109
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,1,0.03824320137500763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,1,0.03736799955368042
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,1,0.03703359961509704
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,1,8191,4.410739135742188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,2,8191,4.407358551025391
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,1,0.03695839941501618
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,1,0.03710559904575348
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,4,8191,4.3856464385986325
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,8,8191,4.403473663330078
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,16,8191,4.378934478759765
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,32,8191,4.381006240844727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,1,0.06438720226287842
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,256,1,64,8191,4.383665466308594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,1,0.052635198831558226
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,1,0.045332801342010495
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,1,0.04488480091094971
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,1,0.04435999989509583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,1,0.04424799978733063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,1,0.04445759952068329
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,3,0.04156480133533478
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,3,0.05552800297737122
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,3,0.037940800189971924
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,3,0.065447998046875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,3,0.037452799081802365
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,3,0.03744319975376129
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,3,0.03712159991264343
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,3,0.03691680133342743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,3,0.053630399703979495
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,3,0.04704639911651611
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,3,0.04671519994735718
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,3,0.04621759951114655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,3,0.046096000075340274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,3,0.04592959880828858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,7,0.05431200265884399
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,7,0.04166080057621002
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,7,0.03800320029258728
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,7,0.037723198533058167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,7,0.037302398681640626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,7,0.03707520067691803
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,7,0.049399998784065244
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,7,0.036881598830223086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,7,0.0681872010231018
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,7,0.05726240277290344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,7,0.050670397281646726
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,7,0.04905439913272858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,7,0.04869759976863861
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,7,0.04872959852218628
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,15,0.055030399560928346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,15,0.042982399463653564
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,15,0.03811199963092804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,15,0.03792159855365753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,15,0.0374752014875412
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,15,0.05914400219917297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,15,0.037217599153518674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,15,0.03742879927158356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,15,0.07575680017471313
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,15,0.06570720076560974
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,15,0.055006402730941775
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,15,0.05636640191078186
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,15,0.055195200443267825
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,15,0.0548687994480133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,31,0.05699359774589539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,31,0.04631359875202179
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,31,0.038576000928878786
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,31,0.0377023994922638
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,31,0.037771201133728026
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,31,0.03778400123119354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,31,0.037854400277137754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,31,0.09278079867362976
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,31,0.08135520219802857
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,31,0.07564160227775574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,31,0.07178720235824584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,31,0.06966400146484375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,31,0.06861439943313599
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,31,0.0678384006023407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,63,0.06425759792327881
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,63,0.05245440006256104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,63,0.04751519858837128
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,63,0.11812479496002197
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,63,0.04532319903373718
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,63,0.04310239851474762
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,63,0.04181599915027619
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,63,0.041116800904273984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,63,0.12762240171432496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,63,0.1126431941986084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,63,0.1090432047843933
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,63,0.10757440328598022
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,63,0.10646400451660157
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,63,0.10636320114135742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,127,0.07582719922065735
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,127,0.06658080220222473
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,127,0.06074240207672119
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,127,0.05819839835166931
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,127,0.05597280263900757
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,127,0.05567039847373963
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,127,0.05581600069999695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,127,0.1968224048614502
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,127,0.18687839508056642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,127,0.18278559446334838
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,127,0.17943520545959474
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,127,0.1774943947792053
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,127,0.1775871992111206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,127,0.1780608057975769
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,255,0.10118399858474732
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,255,0.09113280177116394
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,255,0.08658559918403626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,255,0.08530719876289368
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,255,0.08263840079307556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,255,0.08154079914093018
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,255,0.08244799971580505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,255,0.33466079235076907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,255,0.3236991882324219
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,255,0.3193631887435913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,255,0.316811203956604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,255,0.3147023916244507
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,255,0.31467840671539304
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,255,0.31528639793395996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,511,0.17350560426712036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,511,0.15515999794006347
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,511,0.1465407967567444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,511,0.14171680212020873
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,511,0.13965760469436644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,511,0.13786239624023439
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,511,0.13848960399627686
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,511,0.6320032119750977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,511,0.6138912200927734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,511,0.6053823947906494
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,511,0.6004447937011719
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,511,0.5990928173065185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,511,0.5977856159210205
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,511,0.596889591217041
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,1023,0.26916320323944093
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,1023,0.2525023937225342
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,1023,0.2439471960067749
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,1023,0.23879680633544922
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,1023,0.23595359325408935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,1023,0.23385438919067383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,1023,0.2346031904220581
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,1023,1.1735103607177735
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,1023,1.1568320274353028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,1023,1.1488431930541991
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,1023,1.141273593902588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,1023,1.1473039627075194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,2047,0.46884641647338865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,1023,1.1393263816833497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,1023,1.138907241821289
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,2047,0.4394527912139893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,2047,0.44957599639892576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,2047,0.4347424030303955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,2047,0.432692813873291
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,2047,0.43114399909973145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,2047,0.43178400993347166
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,2047,2.2618160247802734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,1,4095,0.8807024002075196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,2047,2.2456560134887695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,2047,2.2355247497558595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,2047,2.2309423446655274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,2047,2.2224704742431642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,2047,2.2259632110595704
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,2047,2.221899223327637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,4,4095,0.8553983688354492
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,8,4095,0.8274288177490234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,2,4095,0.8558704376220703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,16,4095,0.8245072364807129
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,32,4095,0.824227237701416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,512,1,64,4095,0.8226400375366211
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,1,1,0.09377440214157104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,2,1,0.07492160201072692
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,4,1,0.06636800169944763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,8,1,0.061353600025177
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,16,1,0.06115520000457764
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,32,1,0.06098719835281372
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1024,1,64,1,0.06096000075340271
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,1,1,0.1018720030784607
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,2,1,0.08643040060997009
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,1,4095,4.443552017211914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,2,4095,4.411494445800781
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,4,1,0.07838559746742249
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,32,1,0.07009919881820678
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,8,1,0.07154719829559326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,16,1,0.07047359943389893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,4,4095,4.40191535949707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1024,1,64,1,0.07011680006980896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,8,4095,4.390115356445312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,16,4095,4.389929580688476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,1,3,0.09550399780273437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,32,4095,4.374636840820313
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,2,3,0.07510719895362854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,32,3,0.061289602518081666
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,512,1,64,4095,4.383107376098633
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,8,3,0.061504000425338747
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,4,3,0.06658560037612915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,16,3,0.06089760065078735
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1024,1,64,3,0.06082080006599426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,1,3,0.11075999736785888
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,4,3,0.0815392017364502
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,2,3,0.08997600078582764
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,16,3,0.07337599992752075
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,32,3,0.0733680009841919
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,8,3,0.0760320007801056
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1024,1,64,3,0.07273920178413391
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,1,7,0.09380000233650207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,2,7,0.07592800259590149
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,4,7,0.06605439782142639
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,8,7,0.06173920035362244
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,16,7,0.06110720038414001
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,32,7,0.0608784019947052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1024,1,64,7,0.061264002323150636
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,1,7,0.11474239826202393
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,2,7,0.0977183997631073
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1024,1,64,7,0.07845439910888671
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,4,7,0.08947200179100037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,8,7,0.08222879767417908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,16,7,0.07934719920158387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,32,7,0.07928320169448852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,1,15,0.09568319916725158
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,2,15,0.0772704005241394
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,4,15,0.06766719818115234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,8,15,0.06190239787101746
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,16,15,0.061982399225234984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,4,15,0.10351999998092651
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,32,15,0.061244797706604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1024,1,64,15,0.06122879981994629
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,1,15,0.1334383964538574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,2,15,0.11382559537887574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,8,15,0.10040639638900757
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,16,15,0.0973088026046753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,32,15,0.09474400281906128
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1024,1,64,15,0.09253119826316833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,1,31,0.10029120445251465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,2,31,0.07961120009422303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,4,31,0.07048479914665222
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,8,31,0.06596480011940002
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,16,31,0.06262720227241517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,32,31,0.06140959858894348
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1024,1,64,31,0.0611952006816864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,16,31,0.12889599800109863
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,1,31,0.16468000411987305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,2,31,0.14530240297317504
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,4,31,0.13573280572891236
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,8,31,0.1311776041984558
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,32,31,0.1282639980316162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1024,1,64,31,0.127729594707489
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,1,63,0.10905120372772217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,2,63,0.08650239706039428
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,4,63,0.07853279709815979
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,8,63,0.07468640208244323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,16,63,0.07284799814224244
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,32,63,0.07211840152740479
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1024,1,64,63,0.0714303970336914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,1,63,0.23067998886108398
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,2,63,0.2080143928527832
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,4,63,0.20028159618377686
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,8,63,0.19539040327072144
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,16,63,0.19408960342407228
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,32,63,0.1940719962120056
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1024,1,64,63,0.19245120286941528
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,1,127,0.13246079683303832
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,2,127,0.11023679971694947
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,4,127,0.10109280347824097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,8,127,0.09446560144424439
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,16,127,0.0922160029411316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,32,127,0.09184799790382385
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1024,1,64,127,0.09135519862174987
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,1,127,0.3663343906402588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,2,127,0.3455424070358276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,4,127,0.3337104082107544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,8,127,0.32830080986022947
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,16,127,0.32549760341644285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,32,127,0.32527360916137693
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1024,1,64,127,0.32453598976135256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,1,255,0.18209120035171508
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,2,255,0.16033439636230468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,4,255,0.14950079917907716
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,8,255,0.14428160190582276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,16,255,0.14273120164871217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,32,255,0.1408735990524292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1024,1,64,255,0.14137439727783202
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,1,255,0.6376751899719239
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,2,255,0.6189568042755127
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,4,255,0.6045936107635498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,8,255,0.6004303932189942
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,16,255,0.5976943969726562
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,32,255,0.5968927860260009
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1024,1,64,255,0.5960000038146973
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,1,511,0.31726880073547364
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,2,511,0.28162078857421874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,4,511,0.2641999959945679
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,8,511,0.2556063890457153
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,16,511,0.25238239765167236
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,32,511,0.25023200511932375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1024,1,64,511,0.25169761180877687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,1,511,1.2225935935974122
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,2,511,1.1837599754333497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,4,511,1.1670144081115723
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,8,511,1.156595230102539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,16,511,1.157529640197754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,1,1023,0.5203760147094727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,32,511,1.156060791015625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1024,1,64,511,1.155459213256836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,4,1023,0.46341919898986816
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,2,1023,0.48233919143676757
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,8,1023,0.4539423942565918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,16,1023,0.44913439750671386
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,32,1023,0.44614877700805666
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1024,1,64,1023,0.445684814453125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,4,1023,2.2593711853027343
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,2,1023,2.2776815414428713
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,1,1023,2.3139711380004884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,8,1023,2.248921585083008
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,16,1023,2.238140869140625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,32,1023,2.2281824111938477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1024,1,64,1023,2.235095977783203
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,1,2047,0.9507951736450195
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,2,2047,0.8908736228942871
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,4,2047,0.8641551971435547
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,8,2047,0.8475456237792969
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,16,2047,0.8407999992370605
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,32,2047,0.8380975723266602
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,1,1024,1,64,2047,0.8365903854370117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,1,0.01358560025691986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,1,0.013099199533462525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,1,0.012641599774360657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,2,2047,4.435039901733399
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,1,2047,4.510512161254883
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,8,2047,4.412432098388672
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,4,2047,4.423409652709961
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,16,2047,4.414031982421875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,32,2047,4.393316650390625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,1,0.012491200119256973
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,1,1024,1,64,2047,4.403777694702148
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,1,0.019713599979877473
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,1,0.012153600156307221
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,1,0.012387199699878693
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,1,0.012201599776744843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,1,0.019419200718402863
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,1,0.01908479928970337
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,1,0.019230400025844575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,1,0.01879200041294098
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,1,0.018803200125694274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,1,0.018731200695037843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,3,0.013516800105571746
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,3,0.012895999848842621
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,3,0.012606400251388549
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,3,0.012455999851226807
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,3,0.012358400225639343
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,3,0.012318400293588638
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,3,0.012206400185823441
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,3,0.019702400267124175
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,3,0.0198512002825737
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,3,0.019068799912929535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,3,0.0189423993229866
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,3,0.01881760060787201
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,3,0.018931199610233308
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,3,0.01873279958963394
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,7,0.013124799728393555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,7,0.013371199369430542
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,7,0.012759999930858612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,7,0.01250240057706833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,7,0.012240000069141388
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,7,0.012451200187206269
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,7,0.012406399846076966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,7,0.01987839937210083
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,7,0.019630399346351624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,7,0.019233599305152893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,7,0.018907199800014495
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,7,0.0189968004822731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,7,0.018796800076961516
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,7,0.018828800320625304
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,15,0.01358879953622818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,15,0.013014400005340576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,15,0.012736000120639801
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,15,0.012460800260305405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,15,0.012436799705028534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,15,0.012495999783277511
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,15,0.012363199889659882
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,15,0.019971199333667755
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,15,0.019721600413322448
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,31,0.013555200397968292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,15,0.019180800020694732
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,31,0.012833599746227265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,15,0.019006399810314177
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,15,0.0189423993229866
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,15,0.019054399430751802
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,15,0.019038400053977965
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,31,0.013116799294948578
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,31,0.012585599720478059
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,31,0.0123648002743721
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,31,0.012451200187206269
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,31,0.012353599816560746
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,31,0.02001280039548874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,31,0.019713599979877473
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,31,0.019431999325752257
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,31,0.01894879937171936
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,31,0.018833599984645844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,31,0.019073599576950075
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,31,0.0189968004822731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,63,0.01348160058259964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,63,0.013147200644016265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,63,0.012736000120639801
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,63,0.012703999876976013
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,63,0.012366399914026261
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,63,0.012403199821710587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,63,0.012432000041007996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,63,0.020259200036525725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,63,0.019644799828529357
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,63,0.019307200610637665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,63,0.01921440064907074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,63,0.019065600633621217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,63,0.01902720034122467
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,63,0.01897439956665039
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,127,0.015039999783039094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,127,0.015025599300861359
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,127,0.014455999433994293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,127,0.014412799477577209
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,127,0.014185599982738495
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,127,0.014350399374961853
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,127,0.014207999408245086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,127,0.021638399362564086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,127,0.02157119959592819
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,127,0.020873600244522096
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,127,0.021028800308704375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,127,0.020848000049591066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,127,0.02083200067281723
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,127,0.020787200331687926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,255,0.0181536003947258
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,255,0.018087999522686006
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,255,0.017428800463676453
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,255,0.017433600127696992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,255,0.017364799976348877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,255,0.01733279973268509
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,255,0.017387199401855468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,255,0.02476799935102463
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,255,0.024617600440979003
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,255,0.023998400568962096
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,255,0.02396160066127777
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,255,0.023887999355793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,255,0.023835200071334838
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,255,0.023756800591945647
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,511,0.019531199336051942
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,511,0.019096000492572783
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,511,0.01786559969186783
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,511,0.017155200242996216
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,511,0.016780799627304076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,511,0.01788959950208664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,511,0.018143999576568603
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,511,0.026612800359725953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,511,0.02603999972343445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,511,0.024747200310230255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,511,0.024132800102233887
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,511,0.023603199422359465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,511,0.025012800097465517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,511,0.025007998943328856
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,1023,0.02008640021085739
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,1023,0.019387200474739075
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,1023,0.018188799917697906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,1023,0.017459200322628023
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,1023,0.017078399658203125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,1023,0.017892800271511078
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,1023,0.018111999332904815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,1023,0.02876960039138794
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,1023,0.026704001426696777
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,1023,0.025467199087142945
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,1023,0.02465600073337555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,1023,0.02467840015888214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,1023,0.02534399926662445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,1023,0.025648000836372375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,2047,0.022225600481033326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,2047,0.020287999510765077
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,2047,0.01980479955673218
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,2047,0.018110400438308714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,2047,0.01767839938402176
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,2047,0.01844480037689209
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,2047,0.018367999792099
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,2047,0.03100320100784302
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,2047,0.02953599989414215
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,4095,0.02378080040216446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,2047,0.028284800052642823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,2047,0.027166399359703063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,2047,0.027062401175498962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,2047,0.027486398816108704
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,2047,0.027529600262641906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,4095,0.022998400032520294
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,4095,0.020870399475097657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,4095,0.020000000298023225
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,4095,0.019750399887561797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,4095,0.020750400424003602
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,4095,0.02099200040102005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,4095,0.034908801317214966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,4095,0.03559519946575165
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,4095,0.032969599962234496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,4095,0.03222880065441132
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,4095,0.031632000207901
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,4095,0.0324752002954483
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,4095,0.032872000336647035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,8191,0.026878398656845093
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,8191,0.02555519938468933
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,8191,0.023849600553512575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,8191,0.02213920056819916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,8191,0.022152000665664674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,8191,0.04084480106830597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,8191,0.02369759976863861
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,8191,0.024086399376392363
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,8191,0.04760479927062988
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,8191,0.04475040137767792
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,8191,0.042017599940299986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,8191,0.0401632010936737
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,8191,0.042467200756073
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,8191,0.042377600073814393
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,16383,0.032278400659561154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,16383,0.029951998591423036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,16383,0.026830399036407472
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,16383,0.02651839852333069
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,16383,0.025753599405288697
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,16383,0.025968000292778015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,16383,0.026047998666763307
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,16383,0.06855840086936951
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,16383,0.06734240055084229
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,16383,0.06388319730758667
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,16383,0.058555197715759275
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,16383,0.058166402578353885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,16383,0.059543997049331665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,16383,0.05762879848480225
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,32767,0.04342080056667328
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,32767,0.04266720116138458
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,32767,0.0383103996515274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,32767,0.10674079656600952
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,32767,0.03293760120868683
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,32767,0.03136799931526184
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,32767,0.03136000037193298
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,32767,0.09852960109710693
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,32767,0.03135519921779632
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,32767,0.10287679433822632
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,32767,0.09988319873809814
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,32767,0.09778720140457153
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,32767,0.09963840246200562
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,32767,0.09964479804039002
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,65535,0.05948320031166077
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,65535,0.06077600121498108
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,65535,0.05324159860610962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,65535,0.051976001262664794
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,65535,0.05231040120124817
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,65535,0.05148959755897522
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,65535,0.051795202493667605
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,65535,0.18072160482406616
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,65535,0.17715200185775756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,65535,0.17395360469818116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,65535,0.17261439561843872
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1,1,2,131071,0.0880832016468048
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,65535,0.17211999893188476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,65535,0.1723039984703064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,65535,0.17248640060424805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1,1,1,131071,0.08821920156478882
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1,1,4,131071,0.08291680216789246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1,1,8,131071,0.07926719784736633
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1,1,16,131071,0.07796480059623719
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1,1,32,131071,0.07795040011405945
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1,1,64,131071,0.07719200253486633
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1,1,1,131071,0.3238784074783325
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1,1,2,131071,0.32076320648193357
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1,1,4,131071,0.316483211517334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1,1,8,131071,0.31398561000823977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1,1,16,131071,0.3134335994720459
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,1,0.013699199259281158
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1,1,32,131071,0.31311678886413574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,1,0.013084800541400909
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1,1,64,131071,0.3141279935836792
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,1,0.012745599448680877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,1,0.012375999987125397
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,1,0.01263359934091568
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,1,0.012385600060224534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,1,0.012491200119256973
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,1,0.019952000677585603
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,1,0.019489599764347075
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,1,0.01900320053100586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,1,0.01881600022315979
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,1,0.01865919977426529
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,1,0.01871519982814789
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,1,0.018572799861431122
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,3,0.01361439973115921
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,3,0.013199999928474426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,3,0.012567999958992004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,3,0.012449599802494049
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,3,0.012406399846076966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,3,0.012263999879360199
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,3,0.0123648002743721
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,3,0.019952000677585603
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,3,0.0195375993847847
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,3,0.018900799751281738
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,3,0.01870400011539459
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,3,0.018705600500106813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,3,0.01857919991016388
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,3,0.01858240067958832
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,7,0.013620799779891968
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,7,0.013142399489879608
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,7,0.012798400223255157
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,7,0.01255200058221817
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,7,0.012361600250005721
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,7,0.01233920007944107
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,7,0.012411200255155564
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,7,0.020043200254440306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,7,0.01950239986181259
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,15,0.013558399677276612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,7,0.0189423993229866
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,7,0.018824000656604768
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,7,0.018614399433135986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,7,0.01873439997434616
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,7,0.018464000523090364
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,15,0.013096000254154205
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,15,0.012718400359153748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,15,0.012668800354003907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,15,0.012439999729394913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,15,0.01244800016283989
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,15,0.0123648002743721
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,15,0.019838400185108185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,15,0.019364799559116363
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,15,0.018796800076961516
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,15,0.01879040002822876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,15,0.018726399540901183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,15,0.018688000738620758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,15,0.018785600364208222
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,31,0.013494400680065155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,31,0.013027200102806091
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,31,0.012667199969291687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,31,0.012643200159072877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,31,0.012462399899959564
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,31,0.01255200058221817
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,31,0.012399999797344208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,31,0.01987999975681305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,31,0.01947840005159378
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,31,0.018873600661754607
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,31,0.018935999274253844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,31,0.01870879977941513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,31,0.01871040016412735
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,31,0.018760000169277192
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,63,0.013553600013256072
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,63,0.013193599879741669
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,63,0.012833599746227265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,63,0.012604799866676331
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,63,0.012561599910259246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,63,0.012459199875593185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,63,0.012409599870443344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,63,0.019952000677585603
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,63,0.019494399428367615
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,63,0.018915200233459474
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,63,0.019046400487422944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,63,0.018908800184726716
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,63,0.018908800184726716
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,63,0.01876640021800995
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,127,0.015225599706172942
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,127,0.014860799908638
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,127,0.014454400539398194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,127,0.014417600631713868
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,127,0.014238399267196656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,127,0.02054080069065094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,127,0.014153599739074707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,127,0.014220799505710601
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,127,0.021515199542045595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,127,0.021206399798393248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,127,0.020644800364971162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,127,0.02051520049571991
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,127,0.020431999862194062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,127,0.020423999428749083
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,255,0.018254399299621582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,255,0.018031999468803406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,255,0.017633600533008574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,255,0.017131200432777403
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,255,0.017272000014781953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,255,0.017308799922466277
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,255,0.017097599804401398
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,255,0.024724799394607543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,255,0.02448800057172775
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,255,0.024006399512290954
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,255,0.023756800591945647
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,255,0.02359199970960617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,255,0.02362399995326996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,255,0.023708799481391908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,511,0.019518400728702544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,511,0.019126400351524353
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,511,0.01814880073070526
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,511,0.017262400686740877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,511,0.017067199945449828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,511,0.01791999936103821
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,511,0.018225599825382233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,511,0.027088001370429993
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,511,0.026387199759483337
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,511,0.025094398856163026
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,511,0.02444159984588623
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,511,0.02406879961490631
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,511,0.025169599056243896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,511,0.025228801369667053
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,1023,0.020451200008392335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,1023,0.019707199931144715
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,1023,0.018265600502490997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,1023,0.017744000256061553
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,1023,0.017367999255657195
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,1023,0.018111999332904815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,1023,0.018449600040912627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,1023,0.030219200253486633
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,1023,0.02887359857559204
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,1023,0.02748799920082092
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,1023,0.026500800251960756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,1023,0.026240000128746034
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,1023,0.026956799626350402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,1023,0.02720159888267517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,2047,0.022436800599098205
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,2047,0.02067999988794327
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,2047,0.019406400620937347
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,2047,0.018643200397491455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,2047,0.018345600366592406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,2047,0.01844799965620041
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,2047,0.018727999925613404
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,2047,0.035087999701499936
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,2047,0.033251199126243594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,2047,0.031204798817634584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,2047,0.03052319884300232
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,2047,0.030163198709487915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,2047,0.030209600925445557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,2047,0.030593600869178773
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,4095,0.02592639923095703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,4095,0.02393440008163452
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,4095,0.02128639966249466
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,4095,0.020448000729084016
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,4095,0.019945600628852846
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,4095,0.020534400641918183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,4095,0.020934399962425233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,4095,0.04600160121917725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,4095,0.041791999340057374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,4095,0.039052799344062805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,4095,0.03848319947719574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,4095,0.03808160126209259
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,4095,0.03840320110321045
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,4095,0.039083200693130496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,8191,0.03099679946899414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,8191,0.02534559965133667
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,8191,0.027828800678253173
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,8191,0.024611200392246246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,8191,0.023393599689006804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,8191,0.022644799947738648
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,8191,0.02619360089302063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,8191,0.06707040071487427
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,8191,0.06378239989280701
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,8191,0.05882880091667175
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,8191,0.05662879943847656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,8191,0.05459039807319641
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,8191,0.05614240169525146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,8191,0.05720959901809693
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,16383,0.04267840087413788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,16383,0.03928639888763428
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,16383,0.03381919860839844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,16383,0.10070079565048218
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,16383,0.02977119982242584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,16383,0.028089600801467895
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,16383,0.030019199848175047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,16383,0.030115199089050294
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,16383,0.10509439706802368
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,16383,0.09689599871635438
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,16383,0.09599519968032837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,16383,0.09536160230636596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,16383,0.09753919839859009
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,16383,0.09758560061454773
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,32767,0.058233600854873654
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,32767,0.055163198709487916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,32767,0.05118079781532288
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,32767,0.04917120039463043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,32767,0.04787839949131012
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,32767,0.049163201451301576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,32767,0.04986400008201599
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,32767,0.17991039752960206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,32767,0.175108802318573
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,32767,0.17069439888000487
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,32767,0.16974400281906127
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,32767,0.16870239973068238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,32767,0.17033599615097045
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,32767,0.17144639492034913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,65535,0.08672800064086914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,65535,0.08466719985008239
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,65535,0.07915840148925782
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,65535,0.07647039890289306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,65535,0.07441759705543519
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,65535,0.07536960244178773
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,65535,0.07631360292434693
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,65535,0.321563196182251
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,65535,0.31599841117858884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,65535,0.3115695953369141
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,65535,0.31214079856872556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,65535,0.3114864110946655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,2,1,4,131071,0.13077919483184813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,65535,0.3094703912734985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,65535,0.31096639633178713
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,2,1,64,131071,0.12646880149841308
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,2,1,1,131071,0.14552160501480102
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,2,1,2,131071,0.13612639904022217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,2,1,8,131071,0.1272544026374817
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,2,1,32,131071,0.12678879499435425
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,2,1,16,131071,0.1257312059402466
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,2,1,1,131071,0.599889612197876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,2,1,2,131071,0.5930736064910889
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,2,1,4,131071,0.5879439830780029
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,2,1,8,131071,0.5844672203063965
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,1,0.013700799643993377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,2,1,16,131071,0.5856880187988281
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,1,0.013364799320697784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,2,1,32,131071,0.5857920169830322
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,2,1,64,131071,0.5881904125213623
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,1,0.012811200320720672
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,1,0.012566399574279786
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,1,0.01252640038728714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,1,0.012465599924325943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,1,0.012417600303888322
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,1,0.020057600736618043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,1,0.019780799746513367
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,1,0.019377599656581878
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,1,0.01918399930000305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,1,0.019094400107860565
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,1,0.01886080056428909
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,1,0.01903039962053299
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,3,0.013758400082588195
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,3,0.013254399597644805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,3,0.013131199777126313
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,3,0.012401600182056428
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,3,0.012529599666595458
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,3,0.01244800016283989
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,3,0.012379200011491776
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,3,0.02008959949016571
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,3,0.019811199605464937
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,3,0.019225600361824035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,3,0.01912800073623657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,3,0.019012799859046935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,3,0.018779200315475465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,3,0.01903039962053299
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,7,0.013752000033855438
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,7,0.013380800187587739
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,7,0.012779200077056884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,7,0.01255359947681427
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,7,0.012596799433231354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,7,0.01252480000257492
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,7,0.012491200119256973
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,7,0.02003999948501587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,7,0.01974399983882904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,7,0.019139200448989868
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,7,0.01921280026435852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,7,0.01894560009241104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,7,0.01886560022830963
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,7,0.018884800374507904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,15,0.01366720050573349
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,15,0.013249599933624267
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,15,0.01279360055923462
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,15,0.012537600100040435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,15,0.012561599910259246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,15,0.012531200051307678
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,15,0.012454400211572647
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,15,0.02018879950046539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,15,0.019780799746513367
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,15,0.01918880045413971
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,15,0.019215999543666838
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,15,0.019019199907779692
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,15,0.018887999653816222
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,15,0.018822400271892546
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,31,0.013659200072288514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,31,0.013288000226020813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,31,0.012755200266838074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,31,0.012559999525547028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,31,0.012467200309038163
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,31,0.012703999876976013
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,31,0.020027199387550355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,31,0.012651200592517852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,31,0.020113599300384522
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,31,0.019270400702953338
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,31,0.018966400623321535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,31,0.018984000384807586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,31,0.019019199907779692
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,31,0.019054399430751802
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,63,0.01250240057706833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,63,0.01372160017490387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,63,0.013460800051689148
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,63,0.012912000715732574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,63,0.01276959925889969
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,63,0.012636800110340119
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,63,0.019108800590038298
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,63,0.012564800679683685
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,63,0.020183999836444855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,63,0.019726400077342988
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,63,0.019385600090026857
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,63,0.01926079988479614
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,63,0.01910240054130554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,63,0.019222399592399596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,127,0.015246400237083435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,127,0.015108799934387207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,127,0.014561599493026734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,127,0.01430879980325699
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,127,0.014299200475215912
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,127,0.01433439999818802
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,127,0.014398400485515595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,127,0.02182080000638962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,127,0.021590399742126464
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,127,0.021299199759960176
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,127,0.02110240012407303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,127,0.02080159932374954
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,127,0.020865599811077117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,127,0.020851199328899384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,255,0.018387199938297273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,255,0.018094399571418764
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,255,0.01767520010471344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,255,0.017433600127696992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,255,0.017423999309539796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,255,0.01727679967880249
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,255,0.017350399494171144
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,255,0.025252801179885865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,255,0.025167998671531678
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,255,0.024636800587177276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,255,0.02444480061531067
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,255,0.02452960014343262
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,255,0.024240000545978545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,255,0.02433599978685379
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,511,0.02038719952106476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,511,0.01966399997472763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,511,0.01839520037174225
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,511,0.01777759939432144
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,511,0.017267200350761413
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,511,0.018430399894714355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,511,0.018515199422836304
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,511,0.029820799827575684
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,511,0.028932800889015196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,511,0.027961599826812743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,511,0.02690559923648834
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,511,0.02619200050830841
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,511,0.027353599667549133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,511,0.027620801329612733
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,1023,0.02358720004558563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,1023,0.020057600736618043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,1023,0.018801599740982056
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,1023,0.017763200402259826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,1023,0.017929600179195405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,1023,0.01826400011777878
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,1023,0.018545599281787874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,1023,0.0350959986448288
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,1023,0.0321263998746872
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,1023,0.030801600217819212
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,1023,0.029868799448013305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,1023,0.02953439950942993
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,1023,0.03025279939174652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,1023,0.030583998560905455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,2047,0.02497439980506897
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,2047,0.023391999304294586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,2047,0.02051679939031601
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,2047,0.04529919922351837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,2047,0.01934719979763031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,2047,0.019273599982261656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,2047,0.019124799966812135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,2047,0.019601599872112276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,2047,0.04101920127868652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,2047,0.03860960006713867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,2047,0.03716799914836884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,2047,0.03696799874305725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,2047,0.03733760118484497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,2047,0.03743839859962463
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,4095,0.03030399978160858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,4095,0.025377601385116577
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,4095,0.02351039946079254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,4095,0.021928000450134277
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,4095,0.05674880146980286
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,4095,0.021401600539684297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,4095,0.02280319929122925
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,4095,0.023137600719928743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,4095,0.06694719791412354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,4095,0.06410560011863708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,4095,0.05460960268974304
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,4095,0.05270879864692688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,4095,0.053288000822067264
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,4095,0.053681600093841556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,8191,0.043049600720405576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,8191,0.03774400055408478
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,8191,0.033585599064826964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,8191,0.03044160008430481
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,8191,0.02781279981136322
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,8191,0.030132800340652466
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,8191,0.03055039942264557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,8191,0.1045632004737854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,8191,0.098471999168396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,8191,0.09511200189590455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,8191,0.09409760236740113
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,8191,0.09402559995651245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,8191,0.09736639857292176
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,8191,0.09816960096359253
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,16383,0.05863999724388123
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,16383,0.050361597537994386
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,16383,0.05335680246353149
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,16383,0.0502128005027771
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,16383,0.048056000471115114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,16383,0.04644800126552582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,16383,0.0488431990146637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,16383,0.17946079969406128
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,16383,0.17314720153808594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,16383,0.16995359659194947
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,16383,0.1674896001815796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,16383,0.16711679697036744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,32767,0.07544800043106079
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,16383,0.17079360485076905
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,16383,0.17088799476623534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,32767,0.08708320260047912
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,32767,0.08005279898643494
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,32767,0.07856000065803528
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,32767,0.07312639951705932
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,32767,0.07639520168304444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,32767,0.07580639719963074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,32767,0.3205728054046631
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,32767,0.3147151947021484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,32767,0.31201601028442383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,32767,0.30901598930358887
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,32767,0.3071808099746704
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,32767,0.3119119882583618
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,32767,0.31164801120758057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,65535,0.143014395236969
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,65535,0.13434879779815673
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,65535,0.12807040214538573
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,65535,0.12627359628677368
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,65535,0.12357759475708008
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,65535,0.12699199914932252
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,65535,0.1273759961128235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,65535,0.599187183380127
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,65535,0.5900015830993652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,65535,0.5871488094329834
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,65535,0.582913589477539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,65535,0.5811920166015625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,65535,0.5866352081298828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,4,1,1,131071,0.2531791925430298
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,65535,0.58537278175354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,4,1,2,131071,0.23099360466003419
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,4,1,16,131071,0.22624800205230713
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,4,1,4,131071,0.23100481033325196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,4,1,8,131071,0.2282560110092163
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,4,1,32,131071,0.22786080837249756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,4,1,64,131071,0.22809920310974122
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,4,1,1,131071,1.1547535896301269
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,4,1,2,131071,1.1373104095458983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,4,1,4,131071,1.134648036956787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,1,0.013742400705814362
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,4,1,8,131071,1.131377601623535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,1,0.013340799510478974
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,4,1,16,131071,1.1304767608642579
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,4,1,32,131071,1.1347711563110352
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,1,0.012910400331020356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,1,0.012910400331020356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,1,0.012356799840927125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,1,0.012564800679683685
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,4,1,64,131071,1.129257583618164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,1,0.012520000338554382
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,1,0.020342400670051573
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,1,0.020156799256801604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,1,0.019499200582504272
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,1,0.019340799748897554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,1,0.019395199418067933
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,1,0.01931679993867874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,1,0.019324800372123717
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,3,0.014931200444698334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,3,0.013193599879741669
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,3,0.012910400331020356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,3,0.01260959953069687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,3,0.01242239996790886
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,3,0.012539200484752655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,3,0.012664000689983367
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,3,0.02067359983921051
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,3,0.020075200498104094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,3,0.019174399971961974
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,3,0.019463999569416045
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,3,0.019166399538517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,3,0.01931679993867874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,3,0.019153599441051484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,7,0.01395840048789978
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,7,0.013371199369430542
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,7,0.01284320056438446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,7,0.012559999525547028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,7,0.012555199861526489
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,7,0.012516799569129943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,7,0.012459199875593185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,7,0.02030239999294281
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,7,0.020003199577331543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,7,0.019569599628448488
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,7,0.01945600062608719
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,7,0.019204799830913544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,7,0.01926559954881668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,7,0.019278399646282196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,15,0.014830400049686433
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,15,0.013411200046539307
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,15,0.012894399464130402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,15,0.012798400223255157
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,15,0.019412800669670105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,15,0.012689599394798278
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,15,0.019230400025844575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,15,0.012544000148773193
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,15,0.013068799674510957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,15,0.020521600544452668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,15,0.019871999323368073
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,31,0.012723200023174286
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,15,0.01961439996957779
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,31,0.01250080019235611
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,31,0.012628799676895142
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,15,0.019307200610637665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,15,0.01929119974374771
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,31,0.013755199313163758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,31,0.01340160071849823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,31,0.012982399761676788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,31,0.012708799540996551
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,31,0.021505600214004515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,31,0.019950400292873382
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,31,0.019601599872112276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,31,0.019444799423217772
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,63,0.012910400331020356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,31,0.01923999935388565
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,31,0.019203199446201323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,31,0.01926079988479614
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,63,0.013926400244235993
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,63,0.013545599579811097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,63,0.012780800461769104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,63,0.0127920001745224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,63,0.012647999823093415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,63,0.012699200212955475
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,63,0.02067999988794327
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,63,0.020212799310684204
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,63,0.01982560008764267
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,63,0.01979999989271164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,63,0.019497600197792054
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,63,0.019312000274658202
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,63,0.019523200392723084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,127,0.016832000017166136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,127,0.015083199739456177
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,127,0.01486240029335022
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,127,0.014558400213718414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,127,0.014531199634075165
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,127,0.014281600713729858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,127,0.014555199444293976
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,127,0.022703999280929567
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,127,0.022276799380779266
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,127,0.022015999257564544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,127,0.021564799547195434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,127,0.02175839990377426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,127,0.021427200734615327
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,127,0.021619200706481934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,255,0.01865919977426529
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,255,0.01812320053577423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,255,0.01790879964828491
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,255,0.01762080043554306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,255,0.017615999281406402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,255,0.0174575999379158
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,255,0.017654399573802947
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,255,0.027806401252746582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,255,0.0268640011548996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,255,0.026679998636245726
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,255,0.026470398902893065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,255,0.026590400934219362
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,255,0.02627359926700592
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,255,0.026807999610900878
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,511,0.02290560007095337
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,511,0.02008160054683685
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,511,0.01863519996404648
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,511,0.01802240014076233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,511,0.01794240027666092
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,511,0.018615999817848207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,511,0.018980799615383147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,511,0.03484640121459961
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,511,0.03239200115203857
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,511,0.0307343989610672
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,511,0.030118399858474733
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,511,0.029713600873947144
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,511,0.030928000807762146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,511,0.03096640110015869
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,1023,0.025619199872016905
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,1023,0.022654399275779724
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,1023,0.020019200444221497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,1023,0.018804800510406495
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,1023,0.01826079934835434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,1023,0.019529600441455842
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,1023,0.019377599656581878
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,1023,0.046228799223899844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,1023,0.041198399662971494
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,1023,0.03835839927196503
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,1023,0.03715839982032776
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,1023,0.0364111989736557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,1023,0.03768480122089386
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,1023,0.03753120005130768
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,2047,0.0318015992641449
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,2047,0.02637920081615448
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,2047,0.021172800660133363
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,2047,0.020848000049591066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,2047,0.020790399610996248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,2047,0.022678400576114654
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,2047,0.021692800521850585
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,2047,0.056704002618789676
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,2047,0.06374559998512268
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,2047,0.06741440296173096
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,2047,0.05451679825782776
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,2047,0.05403680205345154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,2047,0.051951998472213747
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,2047,0.05461919903755188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,4095,0.041975998878479005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,4095,0.03782080113887787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,4095,0.03180800080299377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,4095,0.030131199955940248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,4095,0.02571359872817993
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,4095,0.027695998549461365
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,4095,0.027163198590278624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,4095,0.10535839796066285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,4095,0.09446560144424439
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,4095,0.09832479953765869
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,4095,0.09375680088996888
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,4095,0.09285280108451843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,4095,0.09495999813079833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,4095,0.09509919881820679
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,8191,0.05769439935684204
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,8191,0.05427039861679077
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,8191,0.04922559857368469
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,8191,0.04650560021400452
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,8191,0.04555999934673309
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,8191,0.050100797414779664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,8191,0.049604800343513486
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,8191,0.17896319627761842
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,8191,0.17290879487991334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,8191,0.16821919679641723
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,8191,0.167247998714447
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,8191,0.16587040424346924
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,8191,0.16979039907455445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,8191,0.17064640522003174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,16383,0.08564959764480591
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,16383,0.08001279830932617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,16383,0.07656159996986389
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,16383,0.07238720059394836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,16383,0.07272160053253174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,16383,0.07547199726104736
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,16383,0.07647039890289306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,16383,0.31955039501190186
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,16383,0.3067712068557739
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,16383,0.3149087905883789
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,32767,0.14034719467163087
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,16383,0.3092736005783081
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,16383,0.3082832098007202
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,16383,0.309881591796875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,16383,0.3109215974807739
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,32767,0.134060800075531
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,32767,0.1271664023399353
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,32767,0.12185120582580566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,32767,0.12239840030670165
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,32767,0.12664320468902587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,32767,0.12677119970321654
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,32767,0.5954239845275879
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,32767,0.5888895988464355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,32767,0.5852079868316651
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,32767,0.5825551986694336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,32767,0.5812848091125489
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,32767,0.5867839813232422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,32767,0.585200023651123
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,65535,0.24698240756988527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,65535,0.2345423936843872
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,65535,0.22910239696502685
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,65535,0.22619519233703614
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,65535,0.2216655969619751
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,65535,0.2281167984008789
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,65535,0.22808640003204345
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,65535,1.1372447967529298
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,65535,1.1491328239440919
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,65535,1.1326767921447753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,65535,1.1323583602905274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,8,1,1,131071,0.463651180267334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,65535,1.1295663833618164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,65535,1.1327584266662598
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,65535,1.1330271720886231
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,8,1,2,131071,0.4365119934082031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,8,1,4,131071,0.43146719932556155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,8,1,8,131071,0.4295152187347412
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,8,1,32,131071,0.43181438446044923
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,8,1,16,131071,0.4273536205291748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,8,1,64,131071,0.4317967891693115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,1,0.015406399965286255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,1,0.013489599525928497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,1,0.013099199533462525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,1,0.012691199779510498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,1,0.012740799784660339
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,8,1,4,131071,2.2200895309448243
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,8,1,8,131071,2.2225679397583007
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,8,1,1,131071,2.252022361755371
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,8,1,16,131071,2.2149887084960938
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,1,0.012751999497413635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,8,1,2,131071,2.2316783905029296
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,8,1,32,131071,2.2102943420410157
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,1,0.01281760036945343
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,1,0.02174399942159653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,1,0.02006399929523468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,8,1,64,131071,2.2222784042358397
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,1,0.01935359984636307
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,1,0.019497600197792054
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,1,0.019251200556755065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,1,0.019105599820613862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,1,0.0191551998257637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,3,0.014924800395965577
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,3,0.013612799346446991
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,3,0.013097600638866424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,3,0.012761600315570831
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,3,0.012807999551296235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,3,0.012622399628162384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,3,0.01276639997959137
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,3,0.021414400637149812
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,3,0.019870400428771973
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,3,0.019566400349140166
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,3,0.019310399889945984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,3,0.01929280012845993
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,3,0.019047999382019044
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,3,0.019278399646282196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,7,0.015497599542140961
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,7,0.013652800023555756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,7,0.012918399274349212
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,7,0.01268479973077774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,7,0.012780800461769104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,7,0.012768000364303589
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,7,0.01276959925889969
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,7,0.0216511994600296
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,7,0.019886399805545806
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,7,0.019406400620937347
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,7,0.019331200420856474
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,7,0.01920959949493408
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,7,0.01927199959754944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,7,0.019247999787330626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,15,0.015582400560379028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,15,0.013630400598049163
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,15,0.01305759996175766
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,15,0.012683199346065521
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,15,0.01281760036945343
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,15,0.012966400384902954
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,15,0.012912000715732574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,15,0.022254399955272675
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,15,0.02022400051355362
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,15,0.019651199877262115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,15,0.019403199851512908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,15,0.019299200177192687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,15,0.019190399348735808
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,15,0.01934719979763031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,31,0.015324799716472626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,31,0.013468800485134125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,31,0.013329599797725678
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,31,0.012759999930858612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,31,0.012822400033473968
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,31,0.012758399546146392
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,31,0.01281919926404953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,31,0.022302399575710296
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,31,0.0201664000749588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,31,0.02003040015697479
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,31,0.01963520050048828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,31,0.019403199851512908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,31,0.01955839991569519
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,31,0.019222399592399596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,63,0.015607999265193939
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,63,0.013799999654293061
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,63,0.013342399895191193
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,63,0.012998400628566742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,63,0.012960000336170197
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,63,0.013177600502967835
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,63,0.012939199805259705
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,63,0.02221599966287613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,63,0.020641599595546723
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,63,0.020179200172424316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,63,0.020465600490570068
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,63,0.01997919976711273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,63,0.019862399995326997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,63,0.01991039961576462
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,127,0.017310400307178498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,127,0.015350399911403656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,127,0.015041600167751312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,127,0.014772799611091614
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,127,0.014790399372577668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,127,0.014481599628925323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,127,0.014510400593280792
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,127,0.025561600923538208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,127,0.024233600497245787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,127,0.023659199476242065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,127,0.02343199998140335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,127,0.023259200155735016
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,127,0.023281599581241607
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,127,0.023345600068569183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,255,0.020244799554347992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,255,0.018739199638366698
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,255,0.018004800379276275
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,255,0.017871999740600587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,255,0.017867200076580048
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,255,0.017679999768733978
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,255,0.01785759925842285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,255,0.031198400259017944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,255,0.030144000053405763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,255,0.02957119941711426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,255,0.029129600524902342
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,255,0.029223999381065367
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,255,0.029228800535202028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,255,0.028870400786399842
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,511,0.01945279985666275
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,511,0.028174400329589844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,511,0.02377600073814392
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,511,0.020340800285339355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,511,0.018878400325775146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,511,0.018335999548435213
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,511,0.019785599410533906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,511,0.04811840057373047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,511,0.04128159880638123
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,511,0.038124799728393555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,511,0.036776000261306764
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,511,0.03655200004577637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,511,0.03769280016422272
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,511,0.03735840022563934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,1023,0.03492000102996826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,1023,0.026185598969459534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,1023,0.022307200729846953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,1023,0.06936320066452026
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,1023,0.02091519981622696
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,1023,0.020427200198173522
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,1023,0.021078400313854218
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,1023,0.021435199677944182
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,1023,0.06330239772796631
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,1023,0.056974399089813235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,1023,0.052478402853012085
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,1023,0.054731202125549314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,1023,0.05310720205307007
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,1023,0.05260000228881836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,2047,0.04396319985389709
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,2047,0.03861280083656311
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,2047,0.0334991991519928
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,2047,0.027816000580787658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,2047,0.02542079985141754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,2047,0.09909440279006958
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,2047,0.025567999482154845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,2047,0.025649601221084596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,2047,0.1074895977973938
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,2047,0.09508640170097352
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,2047,0.09312639832496643
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,2047,0.09281439781188965
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,4095,0.047870400547981265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,2047,0.09359200000762939
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,2047,0.09375039935111999
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,4095,0.059617602825164796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,4095,0.05314559936523437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,4095,0.04526079893112182
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,4095,0.04434559941291809
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,4095,0.04596480131149292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,4095,0.04639039933681488
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,4095,0.18088639974594117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,4095,0.17229280471801758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,4095,0.1673632025718689
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,4095,0.16540319919586183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,4095,0.16437920331954955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,4095,0.16591839790344237
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,4095,0.16703840494155883
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,8191,0.08780800104141236
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,8191,0.08036479949951172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,8191,0.07400959730148315
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,8191,0.07099199891090394
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,8191,0.06930559873580933
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,8191,0.07417119741439819
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,8191,0.07458720207214356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,8191,0.3207551956176758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,8191,0.3113951921463013
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,8191,0.3074592113494873
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,8191,0.3055056095123291
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,8191,0.30430400371551514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,8191,0.30869920253753663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,8191,0.3096303939819336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,16383,0.12353760004043579
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,16383,0.1411504030227661
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,16383,0.13065600395202637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,16383,0.1239408016204834
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,16383,0.1211840033531189
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,16383,0.12009919881820678
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,16383,0.12520159482955934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,16383,0.580944013595581
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,16383,0.5977759838104248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,16383,0.5861199855804443
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,16383,0.5783823966979981
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,16383,0.5779104232788086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,16383,0.5825007915496826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,32767,0.2476191997528076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,32767,0.23022880554199218
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,16383,0.584065580368042
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,32767,0.22400639057159424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,32767,0.22028639316558837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,32767,0.2197567939758301
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,32767,0.2226815938949585
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,32767,0.22397921085357667
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,32767,1.1473199844360351
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,32767,1.1325263977050781
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,32767,1.1278063774108886
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,32767,1.124561595916748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,32767,1.1237055778503418
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,65535,0.47228641510009767
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,32767,1.126318359375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,65535,0.42990880012512206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,32767,1.1287327766418458
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,65535,0.42403039932250974
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,65535,0.4205904006958008
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,65535,0.41913437843322754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,65535,0.4218319892883301
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,65535,0.4220736026763916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,16,1,1,131071,0.9110079765319824
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,65535,2.2553216934204103
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,65535,2.2163999557495115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,65535,2.2194480895996094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,65535,2.208299255371094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,65535,2.213710403442383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,65535,2.211057662963867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,65535,2.2102752685546876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,16,1,2,131071,0.8271056175231933
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,16,1,4,131071,0.8186592102050781
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,16,1,8,131071,0.8159119606018066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,16,1,16,131071,0.8141759872436524
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,16,1,32,131071,0.8168512344360351
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,16,1,64,131071,0.8182736396789551
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,16,1,1,131071,4.431739044189453
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,16,1,2,131071,4.368081665039062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,16,1,4,131071,4.3544574737548825
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,1,0.01569119989871979
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,16,1,8,131071,4.413260650634766
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,1,0.015000000596046448
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,16,1,16,131071,4.367515182495117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,16,1,32,131071,4.381166458129883
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,1,0.01361600011587143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,1,0.013758400082588195
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,16,1,64,131071,4.401387023925781
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,1,0.013355199992656708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,1,0.01988479942083359
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,1,0.013344000279903411
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,1,0.01310880035161972
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,1,0.021110400557518005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,1,0.0223471999168396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,3,0.015547199547290802
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,1,0.019776000082492827
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,1,0.02014400064945221
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,1,0.01958719938993454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,1,0.020046399533748628
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,3,0.015091200172901154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,3,0.013655999302864074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,3,0.01327040046453476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,3,0.01343040019273758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,3,0.01334560066461563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,3,0.013612799346446991
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,3,0.022177599370479584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,3,0.021712000668048858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,3,0.019964799284934998
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,3,0.019883200526237488
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,3,0.019920000433921815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,3,0.01990240067243576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,3,0.019883200526237488
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,7,0.016443200409412384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,7,0.015035200119018554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,7,0.013542400300502777
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,7,0.013631999492645264
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,7,0.01326880007982254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,7,0.013233600556850434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,7,0.013286399841308593
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,7,0.02001280039548874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,7,0.023171199858188628
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,7,0.021512000262737273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,7,0.02016319930553436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,7,0.020108799636363982
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,7,0.02009759992361069
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,7,0.019675199687480927
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,15,0.013592000305652618
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,15,0.0160288006067276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,15,0.022307200729846953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,15,0.015398399531841278
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,15,0.01420000046491623
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,15,0.013903999328613281
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,15,0.013387200236320496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,15,0.0134320005774498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,15,0.022038400173187256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,15,0.020662400126457214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,15,0.020147199928760528
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,15,0.020164799690246583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,15,0.02017119973897934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,15,0.02006240040063858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,31,0.015390400588512421
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,31,0.01510079950094223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,31,0.014105600118637086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,31,0.013736000657081604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,31,0.013465599715709686
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,31,0.013438400626182557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,31,0.013582399487495423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,31,0.02245599925518036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,31,0.02211039960384369
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,31,0.020824000239372253
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,31,0.02114879935979843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,31,0.020580799877643587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,31,0.020291200280189513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,31,0.02046400010585785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,63,0.01592639982700348
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,63,0.01611039936542511
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,63,0.014420799911022186
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,63,0.013972799479961395
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,63,0.014182400703430176
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,63,0.014268800616264343
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,63,0.013846400380134582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,63,0.025155198574066163
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,63,0.024241599440574645
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,63,0.02337439954280853
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,63,0.022728000581264497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,63,0.023004800081253052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,63,0.02303680032491684
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,63,0.023003199696540834
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,127,0.01791519969701767
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,127,0.017377600073814392
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,127,0.016062399744987486
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,127,0.015992000699043274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,127,0.015624000132083893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,127,0.015651200711727143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,127,0.0160288006067276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,127,0.029265600442886352
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,127,0.028758400678634645
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,127,0.027393600344657897
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,127,0.02731359899044037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,127,0.02709439992904663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,127,0.027272000908851624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,127,0.027344000339508057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,255,0.020790399610996248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,255,0.02048799991607666
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,255,0.01942880004644394
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,255,0.01897439956665039
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,255,0.018779200315475465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,255,0.01889120042324066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,255,0.019182400405406953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,255,0.03859359920024872
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,255,0.03752799928188324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,255,0.036139199137687684
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,255,0.03617599904537201
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,255,0.03586399853229523
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,255,0.03610559999942779
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,255,0.03639039993286133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,511,0.03911199867725372
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,511,0.029470399022102356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,511,0.023364800214767455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,511,0.021566399931907655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,511,0.020852799713611602
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,511,0.021784000098705292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,511,0.021937599778175353
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,511,0.07306240200996399
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,511,0.06616320013999939
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,511,0.05846080183982849
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,511,0.0552079975605011
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,511,0.05278080105781555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,511,0.05279359817504883
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,511,0.056145602464675905
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,1023,0.04752799868583679
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,1023,0.04173280000686645
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,1023,0.034596800804138184
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,1023,0.11126240491867065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,1023,0.030511999130249025
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,1023,0.024937599897384644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,1023,0.02612000107765198
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,1023,0.026643198728561402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,1023,0.1014415979385376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,1023,0.09511680006980897
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,1023,0.09156479835510253
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,1023,0.09228960275650025
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,1023,0.09327520132064819
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,1023,0.09339200258255005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,2047,0.06380000114440917
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,2047,0.057411199808120726
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,2047,0.049446401000022885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,2047,0.046481600403785704
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,2047,0.04504159986972809
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,2047,0.04535999894142151
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,2047,0.045664000511169436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,2047,0.18422880172729492
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,2047,0.17539360523223876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,2047,0.16859519481658936
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,2047,0.16601760387420655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,2047,0.16532160043716432
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,2047,0.16637760400772095
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,2047,0.16607040166854858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,4095,0.09317119717597962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,4095,0.08289759755134582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,4095,0.07509599924087525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,4095,0.07217599749565125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,4095,0.06997759938240052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,4095,0.0717519998550415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,4095,0.07229440212249756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,4095,0.30508320331573485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,4095,0.3247904062271118
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,4095,0.314465594291687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,4095,0.3090831995010376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,4095,0.30668001174926757
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,4095,0.3069744110107422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,4095,0.3075216054916382
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,8191,0.14673279523849486
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,8191,0.13287999629974365
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,8191,0.12525440454483033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,8191,0.12217600345611572
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,8191,0.11975040435791015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,8191,0.12483839988708496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,8191,0.1248960018157959
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,8191,0.5909247875213623
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,8191,0.5825232028961181
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,8191,0.6021488189697266
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,8191,0.57991042137146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,8191,0.5779615879058838
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,8191,0.5849055767059326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,8191,0.5863167762756347
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,16383,0.2214656114578247
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,16383,0.25213921070098877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,16383,0.22550559043884277
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,16383,0.23365440368652343
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,16383,0.2193455934524536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,16383,0.22379040718078613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,16383,0.22395999431610109
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,16383,1.1583791732788087
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,16383,1.1348480224609374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,16383,1.128390407562256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,16383,1.1254063606262208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,16383,1.1241344451904296
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,16383,1.1274880409240722
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,32767,0.4771584033966064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,32767,0.4326943874359131
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,16383,1.1331199645996093
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,32767,0.42443199157714845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,32767,0.42165279388427734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,32767,0.4184703826904297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,32767,0.42259039878845217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,32767,0.4227871894836426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,32767,2.2509584426879883
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,32767,2.225366401672363
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,32767,2.219905662536621
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,32767,2.2174560546875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,32767,2.2071327209472655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,32767,2.216548728942871
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,32767,2.2282880783081054
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,32,1,1,65535,0.912441635131836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,32,1,2,65535,0.8288288116455078
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,32,1,4,65535,0.8200896263122559
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,32,1,8,65535,0.8166591644287109
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,32,1,16,65535,0.8171600341796875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,32,1,32,65535,0.8186623573303222
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,32,1,64,65535,0.8185711860656738
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,1,0.01712159961462021
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,1,0.01618880033493042
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,1,0.014791999757289887
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,1,0.014558400213718414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,1,0.01422239989042282
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,1,0.014239999651908874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,1,0.014230400323867798
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,1,0.024255999922752382
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,1,0.023060800135135652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,1,0.021247999370098115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,1,0.020916800200939178
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,1,0.020713600516319274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,1,0.020761600136756896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,1,0.020751999318599702
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,3,0.017148800194263458
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,32,1,1,65535,4.507556915283203
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,32,1,4,65535,4.370100784301758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,32,1,2,65535,4.392633438110352
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,32,1,8,65535,4.371503829956055
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,3,0.01632159948348999
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,32,1,16,65535,4.3831024169921875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,3,0.014019200205802917
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,32,1,32,65535,4.40449104309082
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,3,0.01478240042924881
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,32,1,64,65535,4.384183883666992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,3,0.014134399592876434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,3,0.014164799451828003
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,3,0.014238399267196656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,3,0.024240000545978545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,3,0.022308799624443054
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,3,0.02130240052938461
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,3,0.021297599375247955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,3,0.021022400259971617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,3,0.02061759978532791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,3,0.020956799387931824
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,7,0.0171984001994133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,7,0.016251200437545778
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,7,0.014595200121402741
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,7,0.014116799831390381
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,7,0.014596800506114959
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,7,0.013916799426078796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,7,0.0143327996134758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,7,0.023919999599456787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,7,0.02295839935541153
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,7,0.021254399418830873
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,7,0.02067520022392273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,7,0.020788800716400147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,7,0.021212799847126006
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,7,0.021062399446964263
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,15,0.017283199727535246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,15,0.01578879952430725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,15,0.014979200065135955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,15,0.014748799800872802
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,15,0.014206400513648987
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,15,0.01414559930562973
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,15,0.014894400537014008
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,15,0.024452799558639528
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,15,0.022935999929904936
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,15,0.02178879976272583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,15,0.021697600185871125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,15,0.02147520035505295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,15,0.02110559940338135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,15,0.021595199406147004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,31,0.01746560037136078
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,31,0.015958400070667268
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,31,0.015041600167751312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,31,0.014353600144386292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,31,0.014406399428844452
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,31,0.014734399318695069
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,31,0.014430400729179383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,31,0.02632000148296356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,31,0.025011199712753295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,31,0.023800000548362732
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,31,0.02344159930944443
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,31,0.023049600422382355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,31,0.0232464000582695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,31,0.02327679991722107
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,63,0.01736160069704056
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,63,0.01661919951438904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,63,0.016008000075817107
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,63,0.014851200580596923
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,63,0.014679999649524688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,63,0.01465120017528534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,63,0.014732800424098969
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,63,0.029529601335525513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,63,0.028278398513793945
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,63,0.027188798785209654
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,63,0.02683520019054413
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,63,0.026398399472236635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,63,0.026630398631095887
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,63,0.026655998826026917
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,127,0.019512000679969787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,127,0.019659200310707094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,127,0.017321600019931792
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,127,0.017182399332523347
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,127,0.016833600401878358
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,127,0.01703840047121048
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,127,0.01700959950685501
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,127,0.03868640065193176
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,127,0.036580801010131836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,127,0.03474400043487549
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,127,0.03424319922924042
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,255,0.020428800582885744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,127,0.033766400814056394
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,255,0.020147199928760528
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,127,0.033897599577903746
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,255,0.019823999702930452
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,255,0.02024960070848465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,127,0.03407999873161316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,255,0.023423999547958374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,255,0.021963199973106383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,255,0.020454399287700653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,255,0.06049280166625977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,255,0.055852800607681274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,255,0.05254080295562744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,255,0.05071200132369995
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,255,0.05113919973373413
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,255,0.05154880285263062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,255,0.05029600262641907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,511,0.041193601489067075
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,511,0.04559360146522522
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,511,0.03694080114364624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,511,0.03183520138263703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,511,0.025860801339149475
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,511,0.03164480030536652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,511,0.027249601483345032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,511,0.10531840324401856
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,511,0.10689120292663574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,511,0.09721919894218445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,511,0.09454879760742188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,511,0.0927519977092743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,511,0.0948639988899231
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,511,0.09426400065422058
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,1023,0.054971200227737424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,1023,0.060145598649978635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,1023,0.051520001888275144
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,1023,0.047075200080871585
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,1023,0.04552319943904877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,1023,0.04631359875202179
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,1023,0.0472815990447998
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,1023,0.17907520532608032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,1023,0.1766111969947815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,1023,0.1718608021736145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,1023,0.16825759410858154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,1023,0.16516480445861817
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,1023,0.16719199419021608
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,1023,0.16843520402908324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,2047,0.08294559717178344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,2047,0.08670880198478699
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,2047,0.07762240171432495
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,2047,0.07296640276908875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,2047,0.3189183950424194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,2047,0.07048320174217224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,2047,0.07215999960899352
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,2047,0.07252320051193237
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,2047,0.31896319389343264
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,2047,0.3116447925567627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,2047,0.30657119750976564
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,2047,0.3067215919494629
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,2047,0.30720160007476804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,2047,0.30846879482269285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,4095,0.13704479932785035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,4095,0.12427200078964233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,4095,0.1375615954399109
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,4095,0.1275231957435608
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,4095,0.12265919446945191
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,4095,0.1203536033630371
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,4095,0.5974256038665772
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,4095,0.12311840057373047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,4095,0.5858784198760987
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,4095,0.592139196395874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,4095,0.5825984001159668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,4095,0.5808544158935547
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,4095,0.5836063861846924
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,4095,0.5834303855895996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,8191,0.24644958972930908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,8191,0.2384592056274414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,8191,0.22781760692596437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,8191,0.22181119918823242
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,8191,0.2198319911956787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,8191,0.22476959228515625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,8191,0.22956318855285646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,8191,1.1493023872375487
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,8191,1.1404191970825195
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,8191,1.130679988861084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,8191,1.1305264472961425
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,8191,1.1286831855773927
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,8191,1.135587215423584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,8191,1.1330544471740722
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,16383,0.46844801902770994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,16383,0.4263984203338623
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,16383,0.4407648086547852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,16383,0.4211535930633545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,16383,0.4188416004180908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,16383,0.42502717971801757
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,16383,0.42809600830078126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,16383,2.247841644287109
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,16383,2.224542427062988
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,16383,2.2309343338012697
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,16383,2.2174448013305663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,16383,2.2080223083496096
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,16383,2.2295040130615233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,16383,2.22116641998291
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,64,1,1,32767,0.9158767700195313
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,64,1,2,32767,0.8327407836914062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,64,1,4,32767,0.8226880073547364
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,64,1,8,32767,0.8179072380065918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,64,1,16,32767,0.8146384239196778
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,64,1,64,32767,0.8258255958557129
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,64,1,32,32767,0.8208448410034179
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,1,0.029172798991203307
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,1,0.01796479970216751
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,1,0.016012799739837647
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,1,0.016312000155448914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,1,0.015967999398708344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,1,0.016279999911785126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,1,0.015859200060367583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,1,0.03544479906558991
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,1,0.02449599951505661
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,1,0.0228752002120018
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,1,0.02289759963750839
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,1,0.022991999983787537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,64,1,1,32767,4.485889434814453
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,1,0.02285439968109131
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,64,1,2,32767,4.397143936157226
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,64,1,4,32767,4.397496032714844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,64,1,8,32767,4.3729103088378904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,3,0.028201600909233092
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,1,0.022891199588775633
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,64,1,64,32767,4.365975952148437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,64,1,16,32767,4.382819366455078
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,64,1,32,32767,4.419140625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,3,0.018223999440670012
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,3,0.016118399798870087
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,3,0.01621599942445755
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,3,0.016047999262809753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,3,0.015667200088500977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,3,0.015483200550079346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,3,0.038324800133705136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,3,0.022486400604248048
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,3,0.02468319982290268
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,3,0.022518399357795715
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,3,0.022460800409317017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,3,0.022572800517082214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,3,0.02266079932451248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,7,0.028502398729324342
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,7,0.017688000202178956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,7,0.016323199868202208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,7,0.015614399313926696
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,7,0.01624799966812134
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,7,0.023175999522209167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,7,0.01626559942960739
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,7,0.01578560024499893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,7,0.03752320110797882
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,7,0.024875199794769286
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,7,0.023153600096702576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,7,0.022924800217151643
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,7,0.0231904000043869
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,7,0.02271360009908676
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,15,0.01581120043992996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,15,0.029252800345420837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,15,0.04179520010948181
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,15,0.018012799322605133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,15,0.016040000319480895
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,15,0.01636800020933151
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,15,0.015934400260448456
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,15,0.015929600596427916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,15,0.026771199703216553
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,15,0.02531520128250122
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,15,0.025676798820495606
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,15,0.025339201092720032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,15,0.025060799717903138
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,15,0.024873599410057068
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,31,0.030212798714637758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,31,0.018083199858665466
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,31,0.01634719967842102
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,31,0.01613599956035614
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,31,0.016089600324630738
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,31,0.01659359931945801
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,31,0.01607840061187744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,31,0.04443199932575226
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,31,0.030265599489212036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,31,0.027953600883483885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,31,0.02826879918575287
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,31,0.02799839973449707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,31,0.02810240089893341
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,31,0.027475199103355406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,63,0.03224479854106903
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,63,0.01918880045413971
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,63,0.01720159947872162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,63,0.017236800491809846
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,63,0.017310400307178498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,63,0.017158399522304534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,63,0.016539199650287627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,63,0.05223199725151062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,63,0.03818880021572113
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,63,0.0347680002450943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,63,0.03466239869594574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,63,0.03476639986038208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,127,0.019118399918079378
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,63,0.03434560000896454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,63,0.03398880064487457
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,127,0.040361601114273074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,127,0.0221903994679451
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,127,0.01937279999256134
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,127,0.019366399943828584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,127,0.01918399930000305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,127,0.01887039989233017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,127,0.07228639721870422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,127,0.050241601467132566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,127,0.05910239815711975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,127,0.05441120266914368
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,127,0.05146239995956421
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,127,0.05267680287361145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,127,0.05004799962043762
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,255,0.04688799977302551
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,255,0.03476159870624542
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,255,0.023444800078868865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,255,0.031124800443649292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,255,0.02723039984703064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,255,0.024094399809837342
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,255,0.02452960014343262
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,255,0.11039199829101562
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,255,0.0948032021522522
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,255,0.09040799736976624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,255,0.09170399904251099
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,255,0.088510400056839
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,255,0.08943520188331604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,255,0.08904640078544616
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,511,0.0711296021938324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,511,0.05330079793930054
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,511,0.04815520048141479
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,511,0.04655199944972992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,511,0.045614400506019594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,511,0.043808001279830935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,511,0.04446719884872437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,511,0.19482719898223877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,511,0.1733456015586853
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,511,0.16823999881744384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,511,0.1669376015663147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,511,0.1655392050743103
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,511,0.16561919450759888
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,511,0.1641744017601013
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,1023,0.10005600452423095
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,1023,0.07876319885253906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,1023,0.07425919771194459
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,1023,0.07196320295333862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,1023,0.06978880167007447
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,1023,0.07012320160865784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,1023,0.06985920071601867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,1023,0.33608479499816896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,1023,0.31191999912261964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,1023,0.3085599899291992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,1023,0.30727999210357665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,1023,0.3049776077270508
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,1023,0.3055903911590576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,1023,0.3057935953140259
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,2047,0.15365920066833497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,2047,0.1288383960723877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,2047,0.12408000230789185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,2047,0.1216096043586731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,2047,0.12013759613037109
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,2047,0.12005120515823364
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,2047,0.1191823959350586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,2047,0.6140096187591553
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,2047,0.5871823787689209
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,2047,0.5837791919708252
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,2047,0.5812479972839355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,2047,0.5807919979095459
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,2047,0.5801136016845703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,2047,0.5779952049255371
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,4095,0.2636143922805786
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,4095,0.2282047986984253
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,4095,0.2222383975982666
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,4095,0.22060320377349854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,4095,0.21914238929748536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,4095,0.21819519996643066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,4095,0.21857759952545167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,4095,1.1687984466552734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,4095,1.134395217895508
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,4095,1.126310443878174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,4095,1.1251631736755372
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,4095,1.1275103569030762
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,4095,1.1273759841918944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,4095,1.1217951774597168
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,8191,0.4964591979980469
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,8191,0.4444511890411377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,8191,0.42197279930114745
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,8191,0.41936640739440917
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,8191,0.4171152114868164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,8191,0.4161839962005615
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,8191,0.41566882133483884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,8191,2.261323165893555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,8191,2.22249755859375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,8191,2.2268655776977537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,8191,2.2225519180297852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,8191,2.2057743072509766
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,8191,2.2078847885131836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,8191,2.2096960067749025
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,128,1,1,16383,0.9521743774414062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,128,1,2,16383,0.8366175651550293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,128,1,4,16383,0.8183600425720214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,128,1,8,16383,0.8156543731689453
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,128,1,16,16383,0.812440013885498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,128,1,32,16383,0.813212776184082
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,128,1,64,16383,0.8130000114440918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,1,0.0556335985660553
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,1,0.02715519964694977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,1,0.024087999761104584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,1,0.023004800081253052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,1,0.02285760045051575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,1,0.02279839962720871
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,1,0.022628800570964815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,1,0.061452800035476686
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,1,0.036345601081848145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,128,1,1,16383,4.488393783569336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,1,0.03083840012550354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,1,0.029468798637390138
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,1,0.03038879930973053
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,1,0.02922239899635315
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,1,0.02977440059185028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,128,1,2,16383,4.4358062744140625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,3,0.05286239981651306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,128,1,4,16383,4.368439865112305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,3,0.031092798709869383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,128,1,8,16383,4.380195236206054
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,3,0.023321600258350374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,128,1,16,16383,4.3848224639892575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,3,0.023656000196933747
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,128,1,32,16383,4.378619384765625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,3,0.023108799755573273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,128,1,64,16383,4.379742431640625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,3,0.022755199670791627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,3,0.022686399519443512
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,3,0.0616815984249115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,3,0.037887999415397645
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,7,0.05306079983711243
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,3,0.03134559988975525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,7,0.023868800699710847
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,3,0.030888000130653383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,3,0.030502399802207945
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,3,0.0298768013715744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,3,0.030006399750709532
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,7,0.028519999980926514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,7,0.0234592005610466
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,7,0.02317280024290085
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,7,0.02311040014028549
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,7,0.022939200699329376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,7,0.06460800170898437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,7,0.04074879884719849
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,7,0.03254719972610474
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,7,0.0321727991104126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,7,0.032094401121139524
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,7,0.0322735995054245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,7,0.03208000063896179
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,15,0.05343679785728454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,15,0.02853280007839203
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,15,0.06710399985313416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,15,0.023945599794387817
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,15,0.023444800078868865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,15,0.02324160039424896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,15,0.023060800135135652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,15,0.022868800163269042
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,15,0.04503520131111145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,31,0.054872000217437746
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,15,0.0364111989736557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,15,0.034764799475669864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,15,0.0350383996963501
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,15,0.03468799889087677
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,15,0.03459199965000152
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,31,0.03205919861793518
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,31,0.023996800184249878
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,31,0.023835200071334838
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,31,0.02356960028409958
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,31,0.023236800730228425
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,31,0.02337439954280853
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,31,0.07473120093345642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,31,0.05341119766235351
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,31,0.04362240135669708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,31,0.04116320013999939
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,31,0.04088160097599029
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,31,0.04071840047836304
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,31,0.04059840142726898
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,63,0.05833920240402222
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,63,0.037448000907897946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,63,0.026660799980163574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,63,0.025092801451683043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,63,0.024275200068950654
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,63,0.024216000735759736
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,63,0.02406879961490631
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,63,0.09321920275688171
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,63,0.07042400240898132
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,63,0.06324480175971985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,63,0.0558896005153656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,63,0.056752002239227294
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,63,0.055644798278808597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,63,0.055113601684570315
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,127,0.06701120138168334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,127,0.04421280026435852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,127,0.039241600036621097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,127,0.03410240113735199
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,127,0.03283840119838714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,127,0.02964639961719513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,127,0.029862400889396668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,127,0.09763839840888977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,127,0.09683200120925903
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,127,0.130948805809021
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,127,0.10818719863891602
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,127,0.10220799446105958
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,127,0.0999791979789734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,127,0.09624000191688538
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,255,0.08086720108985901
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,255,0.058508801460266116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,255,0.048614400625228885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,255,0.053427201509475705
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,255,0.04984799921512604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,255,0.04934720098972321
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,255,0.04816640019416809
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,255,0.20202560424804689
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,255,0.1787392020225525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,255,0.17402880191802977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,255,0.17159039974212648
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,255,0.17027679681777955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,255,0.16994240283966064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,255,0.16935839653015136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,511,0.1288831949234009
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,511,0.09478880167007446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,511,0.08496000170707703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,511,0.07985119819641114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,511,0.0774944007396698
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,511,0.07661439776420594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,511,0.0758288025856018
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,511,0.36193439960479734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,511,0.3277776002883911
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,511,0.3211872100830078
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,511,0.3162735939025879
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,511,0.3149888038635254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,511,0.31370880603790285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,511,0.3140608072280884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,1023,0.1842847943305969
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,1023,0.14338560104370118
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,1023,0.1340448021888733
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,1023,0.1297104001045227
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,1023,0.1262671947479248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,1023,0.12544000148773193
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,1023,0.12489759922027588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,1023,0.6439119815826416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,1023,0.6008063793182373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,1023,0.5923295974731445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,1023,0.5881984233856201
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,1023,0.5872464179992676
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,1023,0.5856224060058594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,1023,0.5848000049591064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,2047,0.29271199703216555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,2047,0.2422719955444336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,2047,0.2332240104675293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,2047,0.22772159576416015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,2047,0.2249135971069336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,2047,0.22468481063842774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,2047,0.22404959201812744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,2047,1.1951968193054199
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,2047,1.1454048156738281
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,2047,1.136903953552246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,2047,1.1328479766845703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,2047,1.1306960105895996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,2047,1.1304047584533692
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,4095,0.5312416076660156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,2047,1.131868839263916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,4095,0.46236958503723147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,4095,0.4327807903289795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,4095,0.42495040893554686
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,4095,0.4226799964904785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,4095,0.4206511974334717
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,4095,0.4204592227935791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,4095,2.314788818359375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,4095,2.2404495239257813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,4095,2.2191232681274413
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,4095,2.230062484741211
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,4095,2.2110591888427735
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,4095,2.2142719268798827
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,4095,2.214147186279297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,256,1,1,8191,1.0123135566711425
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,256,1,2,8191,0.8968720436096191
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,256,1,8,8191,0.8207119941711426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,256,1,4,8191,0.8379263877868652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,256,1,16,8191,0.8201760292053223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,256,1,32,8191,0.8148431777954102
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,256,1,64,8191,0.8152591705322265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,1,0.09387999773025513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,1,0.05333600044250488
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,1,0.041494399309158325
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,256,1,2,8191,4.383107376098633
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,1,0.038385599851608276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,256,1,4,8191,4.389238357543945
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,256,1,1,8191,4.550239944458008
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,256,1,8,8191,4.384907150268555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,256,1,16,8191,4.373152160644532
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,256,1,32,8191,4.381273651123047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,1,0.03718560039997101
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,1,0.03736799955368042
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,256,1,64,8191,4.381887817382813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,1,0.03745119869709015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,1,0.10736479759216308
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,1,0.06424000263214111
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,1,0.05198240280151367
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,1,0.045044800639152525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,1,0.0441103994846344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,1,0.04508320093154907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,1,0.04392000138759613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,3,0.09296640157699584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,3,0.05491679906845093
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,3,0.04146719872951508
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,3,0.03824160099029541
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,3,0.037648001313209535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,3,0.0369488000869751
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,3,0.03697119951248169
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,3,0.1034816026687622
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,3,0.06361759901046753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,3,0.05474240183830261
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,3,0.04703199863433838
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,3,0.0466623991727829
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,3,0.04657759964466095
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,3,0.04646239876747131
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,7,0.09376800060272217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,7,0.05479679703712463
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,7,0.04190720021724701
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,7,0.03873760104179382
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,7,0.037459200620651244
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,7,0.03699679970741272
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,7,0.037601599097251893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,7,0.10839200019836426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,7,0.0688207983970642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,7,0.057436800003051756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,7,0.05065600275993347
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,7,0.04972319900989532
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,7,0.04920639991760254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,7,0.04885599911212921
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,15,0.09585279822349549
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,15,0.05538880228996277
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,15,0.042843198776245116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,15,0.07418720126152038
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,15,0.03861440122127533
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,15,0.037483200430870056
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,15,0.03758400082588196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,15,0.037755200266838075
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,15,0.1155135989189148
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,15,0.06589120030403137
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,15,0.05835679769515991
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,15,0.05551999807357788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,15,0.05484799742698669
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,15,0.05483999848365784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,31,0.09944000244140624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,31,0.057265597581863406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,31,0.13539040088653564
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,31,0.04622080028057098
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,31,0.03889600038528442
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,31,0.03769280016422272
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,31,0.03765600025653839
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,31,0.03764640092849732
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,31,0.09187679886817932
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,31,0.08135039806365967
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,31,0.07468000054359436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,31,0.07199360132217407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,31,0.07017279863357544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,31,0.06870239973068237
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,63,0.10143519639968872
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,63,0.06403520107269287
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,63,0.052534401416778564
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,63,0.0480783998966217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,63,0.04501279890537262
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,63,0.04312799870967865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,63,0.04215039908885956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,63,0.16676160097122192
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,63,0.128331196308136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,63,0.11760319471359253
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,63,0.11256639957427979
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,63,0.10884000062942505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,63,0.1074031949043274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,63,0.10706880092620849
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,127,0.11540640592575073
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,127,0.0771727979183197
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,127,0.06664479970932007
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,127,0.06047999858856201
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,127,0.05826240181922913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,127,0.05645279884338379
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,127,0.05584480166435242
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,127,0.23524959087371827
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,127,0.19780639410018921
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,127,0.18709280490875244
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,127,0.1814479947090149
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,127,0.17899359464645387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,127,0.17806880474090575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,127,0.17689759731292726
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,255,0.08451840281486511
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,255,0.14476480484008789
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,255,0.10327039957046509
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,255,0.09157119989395142
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,255,0.08686559796333312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,255,0.08297280073165894
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,255,0.08178560137748718
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,255,0.375600004196167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,255,0.3353280067443848
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,255,0.3249648094177246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,255,0.3192176103591919
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,255,0.3170367956161499
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,255,0.31603360176086426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,255,0.3148224115371704
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,511,0.23791520595550536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,511,0.17359520196914674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,511,0.15639519691467285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,511,0.14628000259399415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,511,0.14185919761657714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,511,0.14058560132980347
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,511,0.1392143964767456
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,511,0.6318992137908935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,511,0.6957600116729736
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,511,0.617193603515625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,511,0.6052944183349609
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,511,0.6015024185180664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,511,0.5991775989532471
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,511,0.5975423812866211
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,1023,0.35231680870056153
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,1023,0.26943359375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,1023,0.2356623888015747
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,1023,0.2522032022476196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,1023,0.24375839233398439
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,1023,0.23843839168548583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,1023,0.23371360301971436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,1023,1.2529359817504884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,1023,1.1769359588623047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,1023,1.1483823776245117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,1023,1.1568240165710448
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,1023,1.1426159858703613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,1023,1.1408016204833984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,2047,0.5836080074310303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,1023,1.1387264251708984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,2047,0.49044480323791506
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,2047,0.4490447998046875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,2047,0.43964319229125975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,2047,0.43462238311767576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,2047,0.43155360221862793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,2047,0.4302879810333252
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,2047,2.3653919219970705
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,512,1,1,4095,1.041756820678711
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,2047,2.2620784759521486
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,2047,2.2437583923339846
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,2047,2.241604804992676
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,2047,2.222969627380371
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,2047,2.229561614990234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,2047,2.2208816528320314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,512,1,2,4095,0.9305760383605957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,512,1,8,4095,0.8418160438537597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,512,1,4,4095,0.9209551811218262
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,512,1,16,4095,0.8278592109680176
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,512,1,32,4095,0.8239808082580566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,512,1,64,4095,0.8226592063903808
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1024,1,1,1,0.170360004901886
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,2,1,0.08926240205764771
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,4,1,0.07394400238990784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,8,1,0.06611359715461732
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,16,1,0.0611840009689331
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,32,1,0.06068639755249024
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,64,1,0.06089280247688293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1024,1,1,1,0.17350720167160033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,2,1,0.10213279724121094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,4,1,0.08630399703979492
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,8,1,0.07840960025787354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,512,1,2,4095,4.42928466796875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,512,1,1,4095,4.60070571899414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,16,1,0.07154719829559326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,512,1,8,4095,4.385947036743164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,512,1,4,4095,4.443983840942383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,64,1,0.06984000205993653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,32,1,0.07089920043945312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,512,1,32,4095,4.379404830932617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,512,1,16,4095,4.475431823730469
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,512,1,64,4095,4.4165088653564455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1024,1,1,3,0.173089599609375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,4,3,0.07448800206184387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,2,3,0.09628480076789855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,64,3,0.06070079803466797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,8,3,0.06596959829330444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,16,3,0.0616703987121582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,32,3,0.06189119815826416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1024,1,1,3,0.1778480052947998
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,2,3,0.10747359991073609
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,4,3,0.09065279960632325
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,8,3,0.08237599730491638
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,16,3,0.07452319860458374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,32,3,0.07312319874763488
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,64,3,0.0738976001739502
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1024,1,1,7,0.167247998714447
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,2,7,0.0931984007358551
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,4,7,0.07602880001068116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,8,7,0.06674079895019532
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,16,7,0.06150559782981872
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,32,7,0.061326402425765994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,64,7,0.06181600093841553
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1024,1,1,7,0.18888800144195556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,2,7,0.11539360284805297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,4,7,0.09738720059394837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,8,7,0.0891983985900879
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,16,7,0.08334400057792664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,32,7,0.08015999794006348
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,64,7,0.07925440073013305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1024,1,1,15,0.1715456008911133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,2,15,0.09686880111694336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,4,15,0.07736160159111023
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,8,15,0.0679423987865448
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,16,15,0.061831998825073245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,32,15,0.061350399255752565
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,4,15,0.1139024019241333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,64,15,0.061972802877426146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1024,1,1,15,0.2081376075744629
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,2,15,0.13384159803390502
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,8,15,0.10379199981689453
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,16,15,0.10049920082092285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,32,15,0.0968783974647522
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,64,15,0.09466559886932373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1024,1,1,31,0.175489604473114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,2,31,0.09872320294380188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,4,31,0.07880160212516785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,8,31,0.07040640115737914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,16,31,0.06536960005760192
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,32,31,0.06223040223121643
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,64,31,0.061894398927688596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1024,1,1,31,0.2400592088699341
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,2,31,0.16548479795455934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,4,31,0.144868803024292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,8,31,0.13578879833221436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,16,31,0.13113759756088256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,8,63,0.07831839919090271
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,32,31,0.12874560356140136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,64,31,0.1280959963798523
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,2,63,0.10869920253753662
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,4,63,0.08611840009689331
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1024,1,1,63,0.18151359558105468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,16,63,0.07405440211296081
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,32,63,0.07276960015296936
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,64,63,0.07182719707489013
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1024,1,1,63,0.30036320686340334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,2,63,0.22955520153045655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,4,63,0.20871520042419434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,8,63,0.19915839433670043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,16,63,0.19835200309753417
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,32,63,0.1931887984275818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,64,63,0.19295040369033814
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,32,127,0.0926367998123169
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1024,1,1,127,0.20467040538787842
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,2,127,0.1340224027633667
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,4,127,0.11066399812698365
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,8,127,0.10092159509658813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,2,127,0.3683824062347412
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,16,127,0.0950160026550293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,64,127,0.09257919788360595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1024,1,1,127,0.437556791305542
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,4,127,0.3454528093338013
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,8,127,0.33319199085235596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,16,127,0.3272511959075928
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,32,127,0.3249295949935913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,64,127,0.3275599956512451
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1024,1,1,255,0.26237120628356936
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,2,255,0.1817039966583252
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,4,255,0.16002240180969238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,8,255,0.14992640018463135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,16,255,0.14489279985427855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,32,255,0.14228320121765137
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,64,255,0.14092320203781128
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1024,1,1,255,0.7198512077331543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,2,255,0.6390448093414307
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,4,255,0.6169968128204346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,8,255,0.6064591884613038
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,16,255,0.6003920078277588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,32,255,0.5976304054260254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,64,255,0.5973264217376709
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1024,1,1,511,0.44337759017944334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,2,511,0.31867520809173583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,4,511,0.28198881149291993
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,8,511,0.2628607988357544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,16,511,0.2551248073577881
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,32,511,0.2544111967086792
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,64,511,0.25142879486083985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,2,511,1.227403163909912
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1024,1,1,511,1.3441408157348633
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,16,511,1.1578783988952637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,8,511,1.1671008110046386
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,4,511,1.1845487594604491
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,32,511,1.158033561706543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,64,511,1.1548336029052735
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,2,1023,0.5327536106109619
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,8,1023,0.4642799854278564
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1024,1,1,1023,0.6836095809936523
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,4,1023,0.4823440074920654
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,16,1023,0.45360479354858396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,32,1023,0.4527103900909424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,64,1023,0.4459807872772217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,2,1023,2.303563117980957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1024,1,1,1023,2.449430465698242
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,4,1023,2.2882879257202147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,8,1023,2.263151931762695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,16,1023,2.246865653991699
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,128,1024,1,1,2047,1.1420703887939454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,32,1023,2.2391839981079102
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,64,1023,2.235251235961914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,64,1024,1,2,2047,0.9716527938842774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,32,1024,1,4,2047,0.9174464225769043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,16,1024,1,8,2047,0.8581711769104003
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,8,1024,1,16,2047,0.8471247673034668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,4,1024,1,32,2047,0.8410592079162598
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,float16,2,1024,1,64,2047,0.8954527854919434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,64,1024,1,2,2047,4.4906463623046875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,128,1024,1,1,2047,4.7048286437988285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,32,1024,1,4,2047,4.446160125732422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,16,1024,1,8,2047,4.411073684692383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,8,1024,1,16,2047,4.399897766113281
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,4,1024,1,32,2047,4.4063873291015625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,mla_generation,flash_attention,float16,fp8,2,1024,1,64,2047,4.414380645751953
