framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,2,1,0.012593600153923034
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,8,1,0.012033600360155106
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,16,1,0.01207360029220581
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,32,1,0.012123200297355651
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,1,1,0.012785600125789642
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1,1,64,1,0.01218079999089241
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,2,1,0.01839679926633835
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,1,1,0.018624000251293182
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,4,1,0.018270400166511536
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,16,1,0.01818400025367737
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,8,1,0.018104000389575957
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,1,3,0.012873600423336028
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,32,1,0.01822720021009445
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,2,3,0.012409599870443344
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,4,3,0.012095999717712403
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,4,1,0.012225600332021714
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,16,3,0.012120000272989272
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,8,3,0.012038400024175644
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,32,3,0.012054400146007537
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1,1,64,3,0.012057600170373916
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,1,3,0.018911999464035035
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,2,3,0.01842560023069382
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,8,3,0.01793919950723648
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,32,3,0.018163199722766876
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1,1,64,3,0.018105599284172057
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,1,7,0.012876799702644348
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,8,7,0.012652799487113953
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,16,7,0.012177599966526032
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,32,7,0.01210239976644516
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1,1,64,7,0.012124799937009812
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,1,7,0.018796800076961516
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,2,7,0.018454399704933167
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,4,7,0.018187199532985688
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,8,7,0.018016000092029572
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,16,7,0.018222400546073915
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,32,7,0.018270400166511536
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1,1,64,7,0.018078400194644927
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,1,15,0.012980799376964568
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,2,15,0.012564800679683685
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,4,15,0.012328000366687774
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,8,15,0.0122079998254776
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,16,15,0.012185599654912949
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,32,15,0.012272000312805176
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1,1,64,15,0.0121568001806736
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,1,15,0.0190528005361557
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,2,15,0.018513600528240203
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,4,15,0.01823520064353943
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,8,15,0.018134400248527527
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,16,15,0.018195199966430663
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,32,15,0.01826400011777878
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1,1,64,15,0.018115200102329254
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,1,31,0.012967999279499053
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,2,31,0.01260959953069687
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,4,31,0.012432000041007996
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,16,31,0.012094400078058242
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,8,31,0.012241599708795547
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,32,31,0.01231039986014366
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1,1,64,31,0.012132800370454788
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,1,31,0.01891999989748001
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,2,31,0.018518400192260743
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,4,31,0.018190400302410127
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,8,31,0.018087999522686006
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,16,31,0.018131199479103088
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,32,31,0.0181536003947258
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1,1,64,31,0.01828639954328537
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,1,63,0.012934400141239167
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,2,63,0.01265760064125061
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,4,63,0.012251199781894683
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,8,63,0.012227199971675873
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,16,63,0.012299200147390365
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,32,63,0.012243200093507767
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1,1,64,63,0.012307199835777282
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,1,63,0.018926399946212768
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,2,63,0.01855839937925339
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,4,63,0.018345600366592406
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1,1,64,1,0.018089599907398224
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,8,63,0.018052799999713896
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,16,63,0.018369600176811218
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,32,63,0.01831679940223694
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1,1,64,63,0.018300800025463103
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,1,127,0.014731200039386749
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,2,127,0.014398400485515595
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,16,127,0.01404000073671341
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,4,127,0.014248000085353851
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,2,7,0.012417600303888322
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,4,3,0.018216000497341157
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,4,7,0.012257599830627441
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,8,127,0.014046399295330048
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,16,3,0.017960000038146972
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,32,127,0.014073599874973298
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1,1,64,127,0.013894400000572205
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,1,127,0.020937600731849672
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,2,127,0.019857600331306458
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,4,127,0.020185600221157073
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,8,127,0.019860799610614776
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,16,127,0.020006400346755982
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,32,127,0.019977599382400513
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1,1,64,127,0.020080000162124634
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,1,255,0.017849600315093993
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,2,255,0.01746239960193634
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,4,255,0.01720000058412552
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,8,255,0.017084799706935883
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,16,255,0.017177599668502807
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,32,255,0.01717280000448227
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1,1,64,255,0.017070400714874267
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,1,255,0.023924799263477327
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,2,255,0.023347200453281404
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,4,255,0.023039999604225158
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,8,255,0.02301120012998581
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,16,255,0.022950400412082673
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,32,255,0.02317280024290085
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1,1,64,255,0.022996799647808076
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,1,511,0.01879040002822876
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,2,511,0.017697599530220032
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,4,511,0.017028799653053282
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,8,511,0.016513599455356597
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,16,511,0.017696000635623932
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,32,511,0.017764799296855927
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1,1,64,511,0.017734399437904357
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,1,511,0.025176000595092774
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,2,511,0.023849600553512575
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,4,511,0.023384000360965728
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,8,511,0.02280319929122925
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,16,511,0.02402079999446869
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,32,511,0.024166400730609893
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1,1,64,511,0.024145600199699403
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,1,1023,0.0191103994846344
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,2,1023,0.01807679980993271
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,4,1023,0.017343999445438386
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,8,1023,0.016777600347995757
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,16,1023,0.01791200041770935
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,32,1023,0.01799360066652298
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,8,1023,0.02361599951982498
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1,1,64,1023,0.018030400574207305
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,1,1023,0.025705599784851076
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,2,1023,0.02478879988193512
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,4,1023,0.023950399458408357
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,16,1023,0.024439999461174013
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,32,1023,0.024639999866485594
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1,1,64,1023,0.024536000192165376
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,1,2047,0.019833600521087645
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,4,2047,0.018113599717617036
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,2,2047,0.018772800266742707
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,8,2047,0.017795200645923614
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,16,2047,0.01823199987411499
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,32,2047,0.018116800487041472
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1,1,64,2047,0.018177600204944612
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,1,2047,0.02836320102214813
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,2,2047,0.027599999308586122
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,4,2047,0.02650400102138519
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,8,2047,0.02593280076980591
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,16,2047,0.026535999774932862
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,32,2047,0.026841598749160766
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1,1,64,2047,0.027004799246788024
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,1,4095,0.021932800114154816
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,2,4095,0.020662400126457214
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,4,4095,0.019849599897861482
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,8,4095,0.01914079934358597
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,16,4095,0.02014559954404831
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,32,4095,0.020310400426387785
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1,1,64,4095,0.0204927995800972
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,1,4095,0.03361760079860687
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,2,4095,0.032400000095367434
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,4,4095,0.03134559988975525
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,8,4095,0.031046399474143983
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,16,4095,0.03193120062351227
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,32,4095,0.031990399956703185
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1,1,64,4095,0.03188959956169128
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,1,8191,0.024459199607372285
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,2,8191,0.023537600040435792
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,4,8191,0.021967999637126923
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,8,8191,0.021996800601482392
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,16,8191,0.02359360009431839
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,32,8191,0.023950399458408357
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1,1,64,8191,0.024235199391841888
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,1,8191,0.042719998955726625
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,2,8191,0.04050399959087372
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,4,8191,0.039900800585746764
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,8,8191,0.039392000436782836
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,16,8191,0.041345599293708804
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,32,8191,0.041500800848007204
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1,1,64,8191,0.041659200191497804
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,1,16383,0.029128000140190125
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,2,16383,0.02697120010852814
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,4,16383,0.02602880001068115
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,8,16383,0.02526719868183136
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,16,16383,0.025889599323272706
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,32,16383,0.025415998697280884
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1,1,64,16383,0.025863999128341676
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,1,16383,0.06307520270347595
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,2,16383,0.057392001152038574
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,4,16383,0.055460798740386966
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,8,16383,0.05578240156173706
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,16,16383,0.05593119859695435
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,32,16383,0.055668801069259644
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1,1,64,16383,0.05556480288505554
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,1,32767,0.037196800112724304
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,2,32767,0.03205440044403076
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,4,32767,0.031585600972175595
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,8,32767,0.03033120036125183
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,16,32767,0.030164799094200133
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,32,32767,0.03034079968929291
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1,1,64,32767,0.02935839891433716
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,1,32767,0.09848160147666932
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,2,32767,0.09562399983406067
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,4,32767,0.09510239958763123
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,8,32767,0.0941424012184143
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,16,32767,0.09412159919738769
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,32,32767,0.09417759776115417
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1,1,64,32767,0.09389600157737732
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,1,65535,0.048785600066185
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,2,65535,0.045335999131202696
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,4,65535,0.044099199771881106
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,8,65535,0.04369759857654572
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,16,65535,0.042985600233078
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,32,65535,0.04270400106906891
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1,1,64,65535,0.04323520064353943
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,1,65535,0.16390080451965333
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,2,65535,0.16220320463180543
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,4,65535,0.16105120182037352
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,8,65535,0.1606335997581482
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,16,65535,0.1605631947517395
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,32,65535,0.16017760038375856
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1,1,64,65535,0.16060320138931275
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,1,131071,0.0674127995967865
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,2,131071,0.06303520202636718
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,4,131071,0.06196640133857727
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,8,131071,0.0620464026927948
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,16,131071,0.060145598649978635
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,32,131071,0.060521602630615234
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1,1,64,131071,0.06084640026092529
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,1,131071,0.2926431894302368
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,2,131071,0.29085440635681153
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,4,131071,0.2895792007446289
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,8,131071,0.28813600540161133
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,16,131071,0.28860158920288087
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,32,131071,0.2893120050430298
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,2,1,0.01250080019235611
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1,1,64,131071,0.2887615919113159
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,1,1,0.012860800325870513
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,4,1,0.012283200025558471
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,8,1,0.012176000326871873
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,16,1,0.011961600184440613
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,32,1,0.012163200229406358
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,2,1,64,1,0.012174399942159653
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,1,1,0.01854880005121231
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,2,1,0.01807039976119995
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,4,1,0.01809599995613098
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,8,1,0.017905600368976593
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,16,1,0.0177839994430542
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,32,1,0.017744000256061553
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,2,1,64,1,0.017958399653434754
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,8,3,0.012081599980592727
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,32,3,0.012191999703645706
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,2,3,0.012387199699878693
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,16,3,0.012198399752378464
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,1,3,0.013055999577045441
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,4,3,0.012247999757528305
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,2,1,64,3,0.012193600088357926
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,1,3,0.018702399730682374
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,2,3,0.017987200617790224
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,4,3,0.01797440052032471
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,8,3,0.017825600504875184
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,16,3,0.017657600343227386
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,16,7,0.012132800370454788
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,32,3,0.017892800271511078
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,2,1,64,3,0.0177839994430542
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,1,7,0.012779200077056884
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,2,7,0.012403199821710587
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,4,7,0.012223999947309494
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,8,7,0.01223519966006279
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,32,7,0.012167999893426895
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,2,1,64,7,0.012177599966526032
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,1,7,0.018569600582122803
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,2,7,0.018052799999713896
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,4,7,0.01775680035352707
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,8,7,0.017798399925231932
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,16,7,0.017841599881649017
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,32,7,0.01788160055875778
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,2,1,64,7,0.017852799594402315
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,1,15,0.01300320029258728
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,2,15,0.012574400007724761
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,4,15,0.0124208003282547
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,8,15,0.012321600317955017
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,16,15,0.012265600264072418
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,32,15,0.012352000176906585
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,2,1,64,15,0.012278400361537933
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,1,15,0.018532800674438476
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,2,15,0.018265600502490997
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,4,15,0.017899200320243835
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,8,15,0.01791359931230545
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,16,15,0.018156799674034118
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,32,15,0.0177824005484581
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,2,1,64,15,0.017795200645923614
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,1,31,0.012960000336170197
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,2,31,0.01255359947681427
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,4,31,0.012289600074291229
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,8,31,0.01226079985499382
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,16,31,0.012148799747228623
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,8,31,0.01786399930715561
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,32,31,0.012148799747228623
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,2,1,64,31,0.01223360002040863
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,1,31,0.01852640062570572
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,2,31,0.01820160001516342
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,4,31,0.01797119975090027
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,16,31,0.01804479956626892
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,32,31,0.018025599420070648
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,2,1,64,31,0.017958399653434754
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,1,63,0.013033600151538849
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,2,63,0.012494400143623352
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,4,63,0.012409599870443344
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,8,63,0.01223199963569641
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,16,63,0.012243200093507767
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,32,63,0.012353599816560746
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,2,1,64,63,0.01234079971909523
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,1,63,0.018691200017929076
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,2,63,0.01815200001001358
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,4,63,0.018143999576568603
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,8,63,0.018060800433158875
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,16,63,0.018145599961280824
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,32,63,0.01810240000486374
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,2,1,64,63,0.018012799322605133
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,1,127,0.014697599411010741
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,2,127,0.014244799315929414
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,4,127,0.014083200693130493
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,8,127,0.014051200449466705
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,16,127,0.014070400595664978
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,32,127,0.014168000221252442
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,2,1,64,127,0.014112000167369843
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,1,127,0.020334400236606598
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,2,127,0.019971199333667755
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,4,127,0.019790400564670563
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,8,127,0.019764800369739533
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,16,127,0.019782400131225585
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,32,127,0.019630399346351624
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,2,1,64,127,0.01974560022354126
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,1,255,0.01780640035867691
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,2,255,0.017343999445438386
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,4,255,0.0173007994890213
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,8,255,0.017185600101947786
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,16,255,0.01721920073032379
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,32,255,0.01714400053024292
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,2,1,64,255,0.017159999907016756
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,2,255,0.023289600014686586
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,1,255,0.02370080053806305
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,4,255,0.0230335995554924
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,8,255,0.02314720004796982
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,16,255,0.02300959974527359
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,32,255,0.022865599393844603
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,2,1,64,255,0.022976000607013703
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,1,511,0.018940800428390504
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,2,511,0.017931200563907623
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,4,511,0.017022399604320525
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,8,511,0.016795200109481812
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,16,511,0.017947199940681457
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,32,511,0.01799360066652298
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,2,1,64,511,0.018087999522686006
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,1,511,0.025812798738479616
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,2,511,0.024166400730609893
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,4,511,0.023694400489330292
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,8,511,0.023292799293994904
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,16,511,0.02414720058441162
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,32,511,0.024489599466323852
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,2,1,64,511,0.02436159998178482
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,1,1023,0.019512000679969787
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,2,1023,0.018241600692272188
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,4,1023,0.01743520051240921
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,8,1023,0.01714719980955124
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,16,1023,0.017825600504875184
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,32,1023,0.018216000497341157
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,2,1,64,1023,0.01810079962015152
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,1,1023,0.028355199098587035
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,2,1023,0.026655998826026917
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,4,1023,0.025843200087547303
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,8,1023,0.02537600100040436
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,16,1023,0.02632960081100464
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,32,1023,0.02635039985179901
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,2,1,64,1023,0.02690880000591278
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,1,2047,0.02072319984436035
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,2,2047,0.019150400161743165
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,4,2047,0.01857759952545166
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,8,2047,0.018219199776649476
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,16,2047,0.01825760006904602
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,32,2047,0.018503999710083006
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,2,1,64,2047,0.018512000143527985
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,1,2047,0.031974399089813234
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,2,2047,0.03070879876613617
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,4,2047,0.029731199145317078
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,8,2047,0.02948319911956787
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,16,2047,0.029702401161193846
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,32,2047,0.029924800992012023
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,2,1,64,2047,0.029844799637794496
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,1,4095,0.02218240052461624
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,2,4095,0.020942400395870208
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,4,4095,0.02017119973897934
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,8,4095,0.019631999731063842
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,16,4095,0.020465600490570068
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,32,4095,0.020505599677562714
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,2,1,64,4095,0.02057439982891083
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,1,4095,0.04040960073471069
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,2,4095,0.03824160099029541
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,4,4095,0.03734079897403717
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,8,4095,0.037064000964164734
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,16,4095,0.03752799928188324
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,32,4095,0.03803200125694275
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,2,1,64,4095,0.0380623996257782
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,1,8191,0.02630079984664917
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,2,8191,0.023350399732589722
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,4,8191,0.02294880002737045
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,8,8191,0.022230400145053862
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,16,8191,0.02497600018978119
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,32,8191,0.025731199979782106
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,2,1,64,8191,0.02563520073890686
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,1,8191,0.06050080060958862
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,2,8191,0.053799998760223386
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,4,8191,0.05222880244255066
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,8,8191,0.051708799600601194
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,16,8191,0.05499039888381958
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,32,8191,0.05511040091514587
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,2,1,64,8191,0.05548959970474243
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,1,16383,0.03397760093212128
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,2,16383,0.028356799483299257
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,4,16383,0.027641600370407103
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,8,16383,0.02662079930305481
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,16,16383,0.02885279953479767
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,32,16383,0.029366400837898255
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,2,1,64,16383,0.029043200612068176
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,1,16383,0.09451040029525756
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,2,16383,0.09179199934005737
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,1,32767,0.04512479901313782
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,4,16383,0.09117599725723266
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,8,16383,0.09121440052986145
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,16,16383,0.09246879816055298
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,32,16383,0.0933568000793457
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,2,32767,0.041448000073432925
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,2,1,64,16383,0.09331520199775696
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,4,32767,0.04012959897518158
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,8,32767,0.038996800780296326
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,16,32767,0.040601599216461184
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,8,32767,0.15613280534744262
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,32,32767,0.04115999937057495
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,2,1,64,32767,0.04082080125808716
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,1,32767,0.15981119871139526
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,2,32767,0.15730719566345214
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,4,32767,0.15666719675064086
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,16,32767,0.1581071972846985
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,32,32767,0.1582319974899292
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,16,65535,0.05812000036239624
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,2,1,64,32767,0.15836960077285767
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,1,65535,0.06377120018005371
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,2,65535,0.0593455970287323
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,4,65535,0.05759040117263794
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,8,65535,0.05639200210571289
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,32,65535,0.05872640013694763
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,2,1,64,65535,0.0591808021068573
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,1,65535,0.28966081142425537
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,2,65535,0.28589279651641847
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,4,65535,0.28543839454650877
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,8,65535,0.2851439952850342
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,16,65535,0.2871088027954102
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,32,65535,0.28669440746307373
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,2,1,64,65535,0.28780159950256345
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,1,131071,0.09968479871749877
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,2,131071,0.09616159796714782
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,4,131071,0.09405760169029236
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,8,131071,0.09238399863243103
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,16,131071,0.09409120082855224
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,32,131071,0.09442239999771118
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,2,1,64,131071,0.0944495975971222
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,1,131071,0.5413455963134766
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,2,131071,0.5392655849456787
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,4,131071,0.5377984046936035
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,8,131071,0.5365119934082031
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,1,1,0.013022400438785553
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,16,131071,0.5385503768920898
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,32,131071,0.5381216049194336
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,2,1,0.012577599287033081
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,4,1,64,1,0.012289600074291229
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,1,1,0.018929600715637207
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,2,1,64,131071,0.5398143768310547
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,4,1,0.01231200024485588
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,8,1,0.012323199957609176
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,16,1,0.012296000123023986
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,32,1,0.012144000083208085
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,2,1,0.018566399812698364
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,4,1,0.018188799917697906
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,8,1,0.018139199912548067
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,16,1,0.018094399571418764
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,32,1,0.01800000071525574
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,4,1,64,1,0.01809599995613098
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,1,3,0.012943999469280243
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,2,3,0.012611199915409089
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,4,3,0.012380799651145935
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,8,3,0.012161599844694138
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,16,3,0.012435200065374375
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,32,3,0.012275200337171555
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,4,1,64,3,0.012366399914026261
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,1,3,0.01900479942560196
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,2,3,0.018403199315071107
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,4,3,0.018143999576568603
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,8,3,0.01812479943037033
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,16,3,0.018105599284172057
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,32,3,0.018223999440670012
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,4,1,64,3,0.0179967999458313
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,1,7,0.013076800107955932
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,2,7,0.012654399871826172
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,4,7,0.0124208003282547
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,8,7,0.01236959993839264
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,16,7,0.0122079998254776
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,32,7,0.012228800356388092
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,4,1,64,7,0.012238399684429168
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,1,7,0.01892160028219223
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,2,7,0.018569600582122803
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,4,7,0.01820639967918396
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,8,7,0.018198400735855103
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,16,7,0.018187199532985688
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,32,7,0.018086400628089905
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,4,1,64,7,0.01809120029211044
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,1,15,0.013081599771976472
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,2,15,0.012656000256538392
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,1,15,0.018932799994945525
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,4,15,0.012411200255155564
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,8,15,0.012372799962759019
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,16,15,0.012272000312805176
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,32,15,0.012385600060224534
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,4,1,64,15,0.01228479966521263
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,2,15,0.01847359985113144
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,4,15,0.01841759979724884
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,8,15,0.018246400356292724
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,16,15,0.01823520064353943
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,32,15,0.018140800297260284
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,4,1,64,15,0.01839040070772171
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,1,31,0.013140800595283508
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,2,31,0.012721599638462066
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,4,31,0.012432000041007996
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,8,31,0.012406399846076966
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,16,31,0.012356799840927125
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,32,31,0.012319999933242797
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,4,1,64,31,0.012428800016641617
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,1,31,0.018984000384807586
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,4,31,0.018217599391937254
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,2,31,0.01855359971523285
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,8,31,0.018302400410175324
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,16,31,0.01820800006389618
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,32,31,0.018291200697422027
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,4,1,64,31,0.018116800487041472
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,1,63,0.013068799674510957
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,2,63,0.012703999876976013
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,4,63,0.012614400684833526
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,8,63,0.012366399914026261
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,16,63,0.012492799758911132
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,32,63,0.012342400103807449
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,4,1,64,63,0.012486399710178375
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,1,63,0.018972800672054292
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,2,63,0.018638400733470915
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,4,63,0.01855839937925339
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,8,63,0.01852319985628128
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,16,63,0.018214400112628936
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,32,63,0.01836320012807846
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,4,1,64,63,0.0181551992893219
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,1,127,0.01496960073709488
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,2,127,0.01454399973154068
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,4,127,0.014347200095653535
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,8,127,0.01422560065984726
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,16,127,0.01420000046491623
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,32,127,0.014115199446678162
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,4,1,64,127,0.014188799262046813
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,1,127,0.020908799767494202
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,2,127,0.020510399341583253
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,4,127,0.020364800095558168
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,8,127,0.02017440050840378
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,16,127,0.020134399831295013
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,32,127,0.020326399803161622
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,4,1,64,127,0.020227199792861937
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,1,255,0.018038399517536163
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,2,255,0.017481599748134614
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,1,255,0.02449920028448105
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,4,255,0.01732800006866455
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,8,255,0.017323200404644013
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,16,255,0.01729599982500076
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,32,255,0.01725600063800812
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,4,1,64,255,0.01728159934282303
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,2,255,0.02401279956102371
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,4,255,0.023705600202083586
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,8,255,0.02367520034313202
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,16,255,0.023609599471092223
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,32,255,0.0236735999584198
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,4,1,64,255,0.02364960014820099
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,1,511,0.019313600659370423
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,2,511,0.018063999712467194
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,4,511,0.017574399709701538
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,8,511,0.017052799463272095
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,16,511,0.018222400546073915
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,32,511,0.01855199933052063
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,4,1,64,511,0.01855040043592453
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,1,511,0.02820639908313751
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,2,511,0.027267199754714967
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,4,511,0.02603999972343445
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,8,511,0.02557600140571594
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,16,511,0.026793599128723145
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,32,511,0.02693600058555603
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,4,1,64,511,0.026824000477790832
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,1,1023,0.02006399929523468
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,2,1023,0.018430399894714355
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,4,1023,0.01772480010986328
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,8,1023,0.017459200322628023
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,16,1023,0.01807679980993271
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,32,1023,0.018382400274276733
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,4,1,64,1023,0.018428799510002137
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,1,1023,0.031324800848960874
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,2,1023,0.02991040050983429
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,4,1023,0.029345598816871644
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,8,1023,0.028867200016975403
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,32,1023,0.029640001058578492
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,16,1023,0.029547199606895447
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,4,1,64,1023,0.029766398668289184
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,1,2047,0.02160159945487976
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,2,2047,0.019976000487804412
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,4,2047,0.019020800292491914
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,8,2047,0.018454399704933167
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,16,2047,0.01899999976158142
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,32,2047,0.01934240013360977
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,4,1,64,2047,0.01915840059518814
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,1,2047,0.03910079896450043
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,2,2047,0.03734880089759827
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,4,2047,0.036643201112747194
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,8,2047,0.03637920022010803
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,16,2047,0.0362527996301651
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,32,2047,0.036392000317573545
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,4,1,64,2047,0.03659839928150177
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,1,4095,0.024300800263881685
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,2,4095,0.022091199457645417
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,4,4095,0.021107199788093566
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,8,4095,0.021187199652194975
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,16,4095,0.02221599966287613
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,32,4095,0.022652800381183624
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,4,1,64,4095,0.022620800137519836
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,1,4095,0.0584447979927063
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,2,4095,0.05309919714927673
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,4,4095,0.05079839825630188
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,8,4095,0.050888001918792725
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,16,4095,0.051551997661590576
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,32,4095,0.051836800575256345
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,4,1,64,4095,0.05331680178642273
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,1,8191,0.03206880092620849
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,2,8191,0.02731359899044037
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,4,8191,0.02632319927215576
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,8,8191,0.025176000595092774
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,16,8191,0.0287200003862381
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,32,8191,0.02969599962234497
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,4,1,64,8191,0.030033600330352784
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,2,8191,0.09028639793395996
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,1,8191,0.09308800101280212
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,4,8191,0.08969280123710632
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,8,8191,0.08969280123710632
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,16,8191,0.09256640076637268
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,32,8191,0.09372959733009338
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,4,1,64,8191,0.09369279742240906
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,1,16383,0.043751999735832214
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,2,16383,0.03956159949302673
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,4,16383,0.03792639970779419
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,8,16383,0.03779999911785126
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,16,16383,0.04071359932422638
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,32,16383,0.041571199893951416
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,4,1,64,16383,0.04161120057106018
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,1,16383,0.15890239477157592
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,2,16383,0.15575360059738158
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,4,16383,0.15491039752960206
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,8,16383,0.15448960065841674
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,16,16383,0.15766240358352662
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,32,16383,0.15854239463806152
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,4,1,64,16383,0.1584447979927063
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,1,32767,0.06198880076408386
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,2,32767,0.058727997541427615
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,4,32767,0.05611199736595154
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,8,32767,0.0555184006690979
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,16,32767,0.05829120278358459
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,32,32767,0.05894079804420471
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,4,1,64,32767,0.05870400071144104
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,1,32767,0.2878767967224121
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,2,32767,0.2848400115966797
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,8,32767,0.28360960483551023
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,4,32767,0.2838927984237671
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,16,32767,0.28664159774780273
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,32,32767,0.2871135950088501
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,4,1,64,32767,0.2873296022415161
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,1,65535,0.0986303985118866
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,2,65535,0.09383999705314636
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,4,65535,0.09105600118637085
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,8,65535,0.09077759981155395
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,16,65535,0.09436320066452027
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,32,65535,0.0942367970943451
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,4,1,64,65535,0.09459840059280396
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,1,65535,0.5395664215087891
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,2,65535,0.5368000030517578
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,4,65535,0.5349199771881104
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,8,65535,0.5347263813018799
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,16,65535,0.5392064094543457
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,32,65535,0.5396624088287354
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,1,131071,0.16806880235671998
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,4,1,64,65535,0.5379119873046875
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,2,131071,0.16142079830169678
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,4,131071,0.16091359853744508
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,8,131071,0.15880160331726073
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,16,131071,0.16377919912338257
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,32,131071,0.16251200437545776
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,4,1,64,131071,0.16300159692764282
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,1,131071,1.0398303985595703
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,2,131071,1.0385456085205078
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,4,131071,1.035961627960205
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,1,1,0.01305759996175766
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,2,1,0.01276639997959137
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,8,131071,1.0367600440979003
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,4,1,0.012535999715328216
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,16,131071,1.0390576362609862
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,8,1,0.012328000366687774
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,32,131071,1.0388671875
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,16,1,0.012433599680662155
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,4,1,64,131071,1.0391056060791015
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,32,1,0.012425599992275238
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,8,1,64,1,0.01234079971909523
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,1,1,0.019281600415706635
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,2,1,0.018875199556350707
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,4,1,0.018727999925613404
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,8,1,0.01850239932537079
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,16,1,0.018639999628067016
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,32,1,0.01825920045375824
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,8,1,64,1,0.018198400735855103
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,1,3,0.013091200590133667
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,2,3,0.012636800110340119
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,4,3,0.01252640038728714
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,8,3,0.01241919994354248
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,16,3,0.012398400157690049
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,32,3,0.012483199685811996
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,8,1,64,3,0.012256000190973282
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,1,3,0.019388799369335175
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,2,3,0.018863999843597413
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,4,3,0.018415999412536622
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,8,3,0.01855199933052063
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,16,3,0.018267199397087097
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,32,3,0.01852159947156906
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,8,1,64,3,0.01854719966650009
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,1,7,0.01308639943599701
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,2,7,0.012689599394798278
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,4,7,0.012436799705028534
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,8,7,0.012303999811410903
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,16,7,0.012494400143623352
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,32,7,0.012323199957609176
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,8,1,64,7,0.012430399656295776
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,1,7,0.019257600605487823
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,2,7,0.018777599930763243
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,4,7,0.018675200641155243
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,8,7,0.018196800351142885
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,16,7,0.01846559941768646
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,32,7,0.01857440024614334
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,8,1,64,7,0.018361599743366243
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,1,15,0.013273599743843078
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,2,15,0.012875199317932129
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,4,15,0.012567999958992004
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,8,15,0.01249760016798973
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,16,15,0.012380799651145935
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,32,15,0.01239520013332367
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,8,1,64,15,0.012459199875593185
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,1,15,0.019254399836063384
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,2,15,0.018904000520706177
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,4,15,0.018544000387191773
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,8,15,0.018500800430774688
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,16,15,0.018564799427986146
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,32,15,0.01858399957418442
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,8,1,64,15,0.01849599927663803
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,1,31,0.01329759955406189
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,2,31,0.01273760050535202
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,4,31,0.012580800056457519
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,8,31,0.012329600006341934
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,16,31,0.012558400630950928
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,32,31,0.012379200011491776
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,8,1,64,31,0.012454400211572647
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,1,31,0.01929599940776825
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,1,63,0.0133775994181633
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,2,31,0.018783999979496
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,4,31,0.018622399866580965
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,8,31,0.01860000044107437
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,16,31,0.01852799952030182
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,32,31,0.018387199938297273
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,8,1,64,31,0.018403199315071107
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,2,63,0.012824000418186187
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,4,63,0.012729600071907043
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,8,63,0.012649600207805634
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,16,63,0.012636800110340119
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,32,63,0.012359999865293504
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,8,1,64,63,0.0124719999730587
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,1,63,0.019307200610637665
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,2,63,0.01918880045413971
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,4,63,0.018935999274253844
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,8,63,0.01883520036935806
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,16,63,0.018681600689888
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,32,63,0.018775999546051025
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,8,1,64,63,0.018607999384403228
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,1,127,0.015115199983119965
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,2,127,0.014579200744628906
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,4,127,0.014494399726390838
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,8,127,0.014286400377750396
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,16,127,0.01441120058298111
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,32,127,0.014215999841690063
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,8,1,64,127,0.01430879980325699
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,1,127,0.02160000056028366
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,2,127,0.021108800172805788
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,4,127,0.021169599890708924
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,8,127,0.02083040028810501
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,16,127,0.020815999805927278
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,32,127,0.020956799387931824
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,8,1,64,127,0.021035200357437132
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,1,255,0.01798879951238632
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,2,255,0.017566399276256563
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,4,255,0.017451199889183044
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,8,255,0.01738400012254715
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,16,255,0.017339199781417847
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,32,255,0.017385600507259368
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,8,1,64,255,0.017448000609874725
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,1,255,0.026523199677467347
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,2,255,0.026107200980186464
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,4,255,0.02582240104675293
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,8,255,0.025862398743629455
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,16,255,0.025577598810195924
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,32,255,0.02569440007209778
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,8,1,64,255,0.02581599950790405
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,1,511,0.02022079974412918
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,2,511,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,4,511,0.01788959950208664
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,8,511,0.017292800545692443
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,16,511,0.018531200289726258
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,32,511,0.018512000143527985
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,8,1,64,511,0.018545599281787874
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,1,511,0.031860798597335815
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,2,511,0.030241599678993224
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,4,511,0.02954559922218323
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,8,511,0.029016000032424927
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,16,511,0.030275198817253112
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,32,511,0.030052798986434936
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,8,1,64,511,0.030398398637771606
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,1,1023,0.021891200542449953
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,2,1023,0.01972319930791855
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,4,1023,0.01897120028734207
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,8,1023,0.018113599717617036
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,16,1023,0.0193792000412941
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,8,1023,0.035811200737953186
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,32,1023,0.019592000544071196
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,8,1,64,1023,0.01907840073108673
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,1,1023,0.03975839912891388
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,2,1023,0.037083199620246886
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,4,1023,0.036401599645614624
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,16,1023,0.0369376003742218
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,32,1023,0.03698399960994721
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,8,1,64,1023,0.0368943989276886
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,1,2047,0.024825599789619446
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,2,2047,0.021750399470329286
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,4,2047,0.020531199872493744
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,8,2047,0.020080000162124634
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,16,2047,0.021089600026607515
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,32,2047,0.020878399908542632
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,8,1,64,2047,0.020790399610996248
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,1,2047,0.058273601531982425
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,2,2047,0.05368000268936157
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,4,2047,0.0508512020111084
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,8,2047,0.05052800178527832
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,16,2047,0.05167999863624573
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,32,2047,0.051025599241256714
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,8,1,64,2047,0.050667202472686766
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,1,4095,0.03273760080337525
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,2,4095,0.027161601185798644
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,4,4095,0.025334399938583375
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,8,4095,0.024318400025367736
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,16,4095,0.025673601031303405
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,8,4095,0.08867200016975403
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,32,4095,0.02707839906215668
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,8,1,64,4095,0.0266400009393692
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,1,4095,0.09321439862251282
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,2,4095,0.09043200016021728
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,4,4095,0.0889631986618042
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,16,4095,0.09063199758529664
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,32,4095,0.0906495988368988
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,8,1,64,4095,0.09071040153503418
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,1,8191,0.043838399648666385
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,2,8191,0.039566400647163394
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,4,8191,0.03734880089759827
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,8,8191,0.0364544004201889
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,16,8191,0.04061599969863892
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,32,8191,0.04129279851913452
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,8,1,64,8191,0.04136959910392761
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,16,8191,0.15818560123443604
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,1,8191,0.15864160060882568
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,2,8191,0.15608639717102052
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,4,8191,0.15399199724197388
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,8,8191,0.15391680002212524
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,32,8191,0.15904639959335326
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,8,1,64,8191,0.15892159938812256
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,1,16383,0.06209920048713684
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,2,16383,0.05753120183944702
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,4,16383,0.05535039901733398
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,8,16383,0.05419679880142212
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,16,16383,0.058524799346923825
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,32,16383,0.05958080291748047
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,8,1,64,16383,0.05891519784927368
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,1,16383,0.28768000602722166
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,2,16383,0.2847984075546265
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,4,16383,0.2841023921966553
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,8,16383,0.2825952053070068
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,16,16383,0.28629279136657715
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,32,16383,0.28716800212860105
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,8,1,64,16383,0.28733439445495607
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,1,32767,0.09907199740409851
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,2,32767,0.09321920275688171
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,4,32767,0.09161760210990906
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,8,32767,0.0903984010219574
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,16,32767,0.09401919841766357
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,32,32767,0.09592000246047974
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,2,32767,0.5372079849243164
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,8,1,64,32767,0.09463199973106384
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,1,32767,0.5407551765441895
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,4,32767,0.5353727817535401
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,8,32767,0.5347583770751954
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,16,32767,0.5383791923522949
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,32,32767,0.5397280216217041
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,1,65535,0.16809279918670655
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,2,65535,0.16139039993286133
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,4,65535,0.15973119735717772
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,8,1,64,32767,0.5398208141326905
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,8,65535,0.15698399543762206
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,16,65535,0.16119680404663086
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,32,65535,0.16271519660949707
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,8,1,64,65535,0.16223679780960082
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,1,65535,1.0409711837768554
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,2,65535,1.0373536109924317
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,4,65535,1.0335968017578125
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,8,65535,1.0337568283081056
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,16,65535,1.0389663696289062
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,2,131071,0.2978912115097046
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,1,131071,0.30774240493774413
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,32,65535,1.0399215698242188
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,4,131071,0.29568960666656496
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,8,131071,0.29405760765075684
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,8,1,64,65535,1.0379039764404296
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,16,131071,0.2965967893600464
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,32,131071,0.2963119983673096
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,8,1,64,131071,0.29836320877075195
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,1,1,0.013436800241470337
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,2,1,0.013019199669361114
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,1,131071,2.0438255310058593
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,2,131071,2.0376272201538086
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,4,1,0.012856000661849975
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,4,131071,2.0379568099975587
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,16,1,0.012414400279521943
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,32,1,0.012889599800109864
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,8,1,0.012483199685811996
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,8,131071,2.033540725708008
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,16,131071,2.038689613342285
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,16,1,64,1,0.012503999471664428
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,32,131071,2.039401626586914
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,16,1,0.018644799292087556
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,8,1,64,131071,2.040507125854492
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,1,1,0.019176000356674196
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,2,1,0.018719999492168425
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,4,1,0.018617600202560425
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,8,1,0.018467199802398682
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,32,1,0.018518400192260743
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,1,3,0.013979199528694152
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,16,1,64,1,0.01836320012807846
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,2,3,0.012859199941158295
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,4,3,0.012521600723266602
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,8,3,0.012756800651550293
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,16,3,0.01257600039243698
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,32,3,0.012513600289821625
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,16,1,64,3,0.012460800260305405
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,1,3,0.01900160014629364
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,2,3,0.018592000007629395
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,4,3,0.018755200505256652
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,8,3,0.01823360025882721
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,16,3,0.018353599309921264
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,32,3,0.018430399894714355
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,16,1,64,3,0.01833759993314743
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,1,7,0.013916799426078796
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,2,7,0.013187199831008911
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,4,7,0.012656000256538392
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,8,7,0.012652799487113953
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,16,7,0.012478400021791458
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,32,7,0.012807999551296235
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,16,1,64,7,0.012617599964141846
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,1,7,0.019755199551582336
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,2,7,0.018743999302387238
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,4,7,0.018750399351119995
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,8,7,0.018620799481868743
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,16,7,0.018452799320220946
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,32,7,0.01834080070257187
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,16,1,64,7,0.018596799671649934
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,1,15,0.013526399433612824
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,2,15,0.013283200562000275
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,4,15,0.012644800543785095
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,8,15,0.012534399330615998
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,16,15,0.012583999335765839
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,32,15,0.012624000012874604
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,16,1,64,15,0.012638400495052337
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,1,15,0.019230400025844575
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,2,15,0.01900479942560196
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,4,15,0.01866080015897751
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,2,31,0.013145600259304047
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,8,15,0.01833920031785965
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,16,15,0.018534399569034576
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,32,15,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,16,1,64,15,0.01839679926633835
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,1,31,0.013369600474834441
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,4,31,0.012783999741077422
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,8,31,0.012729600071907043
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,16,31,0.01260959953069687
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,32,31,0.012803199887275695
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,16,1,64,31,0.012577599287033081
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,1,31,0.020153599977493285
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,2,31,0.018991999328136444
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,4,31,0.018726399540901183
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,8,31,0.018566399812698364
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,16,31,0.018668800592422485
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,32,31,0.01854880005121231
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,16,1,64,31,0.018713599443435668
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,1,63,0.013488000631332398
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,2,63,0.013420799374580383
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,4,63,0.012937599420547485
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,8,63,0.012729600071907043
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,16,63,0.012703999876976013
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,32,63,0.012771199643611907
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,16,1,64,63,0.01279039978981018
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,1,63,0.020227199792861937
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,2,63,0.019801600277423857
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,4,63,0.019406400620937347
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,8,63,0.019648000597953796
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,16,63,0.019172799587249757
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,32,63,0.019681599736213685
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,16,1,64,63,0.019072000682353974
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,1,127,0.015436799824237823
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,2,127,0.015070399641990662
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,4,127,0.014521600306034088
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,8,127,0.014593599736690522
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,16,127,0.014348800480365752
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,32,127,0.01433439999818802
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,16,1,64,127,0.014484800398349762
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,1,127,0.02375040054321289
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,2,127,0.023145599663257597
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,4,127,0.02273920029401779
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,8,127,0.022878399491310118
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,16,127,0.02268960028886795
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,32,127,0.02266719937324524
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,16,1,64,127,0.02276960015296936
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,1,255,0.018646399676799773
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,2,255,0.018041600286960603
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,4,255,0.017795200645923614
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,8,255,0.01783040016889572
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,16,255,0.017318400740623473
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,32,255,0.017735999822616578
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,16,1,64,255,0.017416000366210938
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,1,255,0.029462400078773498
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,2,255,0.029100799560546876
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,4,255,0.028751999139785767
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,8,255,0.028622400760650635
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,16,255,0.02871200144290924
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,32,255,0.02885119915008545
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,16,1,64,255,0.028799998760223388
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,1,511,0.022252799570560457
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,2,511,0.019761599600315094
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,4,511,0.01842080056667328
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,8,511,0.01815840005874634
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,16,511,0.019326399266719817
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,32,511,0.019739200174808503
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,16,1,64,511,0.01918720006942749
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,1,511,0.03957119882106781
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,2,511,0.03694080114364624
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,4,511,0.035743999481201175
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,8,511,0.03558720052242279
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,16,511,0.036520001292228696
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,32,511,0.03702079951763153
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,16,1,64,511,0.036795198917388916
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,1,1023,0.024774399399757386
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,2,1023,0.021491199731826782
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,4,1023,0.020552000403404234
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,8,1023,0.01974399983882904
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,16,1023,0.021206399798393248
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,32,1023,0.021646399796009064
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,16,1,64,1023,0.021353599429130555
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,32,1023,0.05128160119056702
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,1,1023,0.05960800051689148
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,2,1023,0.054420799016952515
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,4,1023,0.0517408013343811
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,8,1023,0.05042240023612976
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,16,1023,0.050046402215957644
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,16,1,64,1023,0.05014240145683289
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,1,2047,0.03344320058822632
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,2,2047,0.026897600293159483
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,4,2047,0.024876800179481507
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,8,2047,0.023992000520229338
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,16,2047,0.024507200717926024
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,8,2047,0.0879472017288208
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,32,2047,0.02439039945602417
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,16,1,64,2047,0.024641600251197816
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,1,2047,0.09387360215187072
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,2,2047,0.08974239826202393
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,4,2047,0.08866720199584961
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,16,2047,0.08853440284729004
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,32,2047,0.0889136016368866
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,16,1,64,2047,0.08843200206756592
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,1,4095,0.04487999975681305
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,2,4095,0.03930239975452423
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,4,4095,0.037478399276733396
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,1,4095,0.15859839916229249
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,8,4095,0.03568640053272247
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,16,4095,0.03796319961547852
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,32,4095,0.0381520003080368
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,16,1,64,4095,0.03852320015430451
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,2,4095,0.15437439680099488
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,4,4095,0.15314879417419433
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,8,4095,0.1525007963180542
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,16,4095,0.15457760095596312
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,32,4095,0.15488159656524658
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,16,1,64,4095,0.1552880048751831
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,1,8191,0.06339840292930603
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,2,8191,0.05725439786911011
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,4,8191,0.05532960295677185
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,8,8191,0.053548800945281985
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,16,8191,0.05835679769515991
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,32,8191,0.05912479758262634
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,16,1,64,8191,0.059248000383377075
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,1,8191,0.28820960521697997
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,2,8191,0.2838351964950562
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,4,8191,0.2827600002288818
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,8,8191,0.28209600448608396
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,16,8191,0.28627839088439944
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,32,8191,0.287608003616333
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,1,16383,0.09855999946594238
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,16,1,64,8191,0.2872688055038452
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,2,16383,0.09287999868392945
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,4,16383,0.0910975992679596
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,8,16383,0.08901280164718628
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,16,16383,0.0934112012386322
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,32,16383,0.09412320256233216
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,16,1,64,16383,0.0952351987361908
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,1,16383,0.5402112007141113
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,2,16383,0.5367136001586914
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,4,16383,0.5351679801940918
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,8,16383,0.5339951992034913
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,16,16383,0.5391808032989502
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,32,16383,0.5405007839202881
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,1,32767,0.1685263991355896
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,16,1,64,16383,0.5411407947540283
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,2,32767,0.16077760457992554
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,4,32767,0.15772639513015746
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,8,32767,0.1559183955192566
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,16,32767,0.16202080249786377
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,32,32767,0.16295839548110963
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,16,1,64,32767,0.1628543972969055
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,1,32767,1.0400591850280763
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,4,32767,1.0348735809326173
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,2,32767,1.0375056266784668
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,8,32767,1.034121608734131
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,16,32767,1.0393343925476075
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,2,65535,0.2960671901702881
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,1,65535,0.3026832103729248
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,32,32767,1.0400992393493653
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,4,65535,0.2928447961807251
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,16,1,64,65535,0.2965296030044556
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,16,1,64,32767,1.0392592430114747
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,8,65535,0.29131839275360105
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,16,65535,0.29543519020080566
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,32,65535,0.29386560916900634
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,1,65535,2.0387487411499023
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,2,65535,2.035971260070801
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,1,131071,0.5698480129241943
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,4,65535,2.0318607330322265
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,8,65535,2.032504081726074
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,16,65535,2.0371360778808594
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,32,65535,2.038857650756836
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,16,1,64,65535,2.0365936279296877
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,4,131071,0.5651823997497558
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,2,131071,0.5698703765869141
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,8,131071,0.5635839939117432
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,16,131071,0.5677055835723877
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,32,131071,0.5642447948455811
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,16,1,64,131071,0.5660672187805176
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,1,1,0.015116800367832185
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,2,1,0.013529600203037262
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,4,1,0.013433599472045898
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,8,1,0.013491199910640716
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,16,1,0.013195200264453888
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,32,1,0.013470399379730224
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,1,131071,4.039628982543945
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,2,131071,4.035208129882813
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,32,1,64,1,0.013232000172138214
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,4,131071,4.051347351074218
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,2,1,0.019380800426006317
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,8,131071,4.032123184204101
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,8,1,0.01918399930000305
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,1,1,0.020681600272655486
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,4,1,0.019593599438667297
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,16,131071,4.035337448120117
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,32,131071,4.035876846313476
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,16,1,0.01912959963083267
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,16,1,64,131071,4.036808013916016
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,32,1,0.01929119974374771
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,32,1,64,1,0.019251200556755065
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,1,3,0.01472959965467453
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,2,3,0.013643200695514678
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,4,3,0.013484799861907959
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,8,3,0.013185599446296692
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,16,3,0.013158400356769562
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,32,3,0.013096000254154205
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,32,1,64,3,0.013415999710559845
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,1,3,0.020559999346733093
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,2,3,0.019436800479888917
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,8,3,0.018984000384807586
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,4,3,0.01945600062608719
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,2,7,0.013673600554466248
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,16,3,0.019032000005245207
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,32,3,0.018939200043678283
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,32,1,64,3,0.018961599469184874
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,1,7,0.015015999972820281
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,4,7,0.013700799643993377
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,8,7,0.013182400166988373
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,16,7,0.013310399651527405
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,32,7,0.013321599364280701
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,32,1,64,7,0.013075199723243714
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,1,7,0.02056639939546585
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,2,7,0.019631999731063842
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,4,7,0.01960960030555725
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,8,7,0.019392000138759614
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,2,15,0.013707199692726135
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,16,7,0.019475199282169342
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,32,1,64,7,0.019044800102710722
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,32,1,64,15,0.013436800241470337
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,32,7,0.01910399943590164
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,1,15,0.014803199470043183
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,4,15,0.013478399813175201
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,8,15,0.01345600038766861
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,16,15,0.013495999574661254
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,32,15,0.013551999628543854
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,1,15,0.021059200167655945
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,2,15,0.019731199741363524
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,4,15,0.019793599843978882
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,8,15,0.019467200338840484
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,16,15,0.01940480023622513
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,32,15,0.019683200120925903
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,32,1,64,15,0.019324800372123717
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,1,31,0.015398399531841278
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,2,31,0.01403840035200119
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,4,31,0.013654400408267976
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,8,31,0.013415999710559845
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,16,31,0.013504000008106231
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,32,31,0.01345279961824417
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,32,1,64,31,0.013713599741458892
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,1,31,0.021137599647045136
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,2,31,0.02052319943904877
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,4,31,0.019993600249290467
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,8,31,0.020187200605869295
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,16,31,0.0198512002825737
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,32,31,0.02008640021085739
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,32,1,64,31,0.01998399943113327
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,1,63,0.015830400586128234
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,2,63,0.014243200421333313
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,4,63,0.014155200123786927
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,8,63,0.014017599821090698
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,16,63,0.014139199256896972
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,32,63,0.013828800618648529
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,32,1,64,63,0.013876800239086152
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,1,63,0.023472000658512116
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,2,63,0.022427199780941008
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,4,63,0.022230400145053862
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,8,63,0.022123199701309205
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,16,63,0.02218399941921234
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,32,63,0.022140799462795256
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,32,1,64,63,0.02189760059118271
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,1,127,0.016790400445461272
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,2,127,0.01602399945259094
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,4,127,0.015886400640010834
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,8,127,0.01571680009365082
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,16,127,0.015936000645160674
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,32,127,0.015452800691127777
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,32,1,64,127,0.01544319987297058
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,1,127,0.02792159914970398
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,2,127,0.026926401257514953
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,4,127,0.026740801334381104
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,8,127,0.026763200759887695
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,16,127,0.026704001426696777
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,32,127,0.026681599020957947
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,32,1,64,127,0.026475200057029726
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,1,255,0.0200095996260643
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,2,255,0.01897920072078705
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,4,255,0.01858240067958832
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,8,255,0.01874080002307892
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,16,255,0.018939200043678283
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,32,255,0.018804800510406495
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,32,1,64,255,0.018753600120544434
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,1,255,0.036796799302101134
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,2,255,0.035708799958229065
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,4,255,0.03561280071735382
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,8,255,0.035390400886535646
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,16,255,0.035492798686027525
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,32,255,0.03556160032749176
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,32,1,64,255,0.03562400043010712
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,1,511,0.028228801488876343
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,2,511,0.022705599665641785
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,4,511,0.02133760005235672
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,8,511,0.020659199357032774
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,16,511,0.02130720019340515
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,32,511,0.021782399713993074
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,32,1,64,511,0.021619200706481934
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,1,511,0.06266400218009949
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,2,511,0.05561760067939758
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,4,511,0.051393598318099976
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,8,511,0.04987039864063263
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,16,511,0.050843197107315066
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,32,511,0.05135040283203125
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,32,1,64,511,0.05110880136489868
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,1,1023,0.03611679971218109
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,2,1023,0.02771199941635132
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,4,1023,0.02427999973297119
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,8,1023,0.022912000119686127
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,16,1023,0.023809599876403808
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,32,1023,0.0239439994096756
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,32,1,64,1023,0.02364480048418045
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,16,1023,0.08756480216979981
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,1,1023,0.0961679995059967
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,2,1023,0.09016799926757812
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,4,1023,0.08806080222129822
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,8,1023,0.08674399852752686
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,32,1023,0.0883903980255127
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,32,1,64,1023,0.08806719779968261
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,1,2047,0.047188800573348996
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,2,2047,0.04031839966773987
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,4,2047,0.03773280084133148
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,8,2047,0.036687999963760376
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,16,2047,0.03728159964084625
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,32,2047,0.03755840063095093
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,8,2047,0.15319039821624755
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,32,1,64,2047,0.037478399276733396
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,1,2047,0.16115200519561768
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,2,2047,0.15540319681167603
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,4,2047,0.15407520532608032
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,16,2047,0.15408480167388916
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,32,2047,0.15434080362319946
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,32,1,64,2047,0.15423359870910644
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,1,4095,0.0659168004989624
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,2,4095,0.05849760174751282
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,4,4095,0.05610719919204712
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,8,4095,0.05420799851417542
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,16,4095,0.05679519772529602
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,32,4095,0.05644000172615051
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,32,1,64,4095,0.05638239979743957
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,1,4095,0.2907696008682251
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,2,4095,0.28517119884490966
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,4,4095,0.2840591907501221
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,8,4095,0.2820255994796753
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,16,4095,0.2847520112991333
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,32,4095,0.28505759239196776
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,32,1,64,4095,0.2844048023223877
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,1,8191,0.10103679895401001
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,2,8191,0.09394559860229493
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,4,8191,0.09043359756469727
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,8,8191,0.09005600214004517
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,16,8191,0.09440960288047791
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,32,8191,0.09573439955711364
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,32,1,64,8191,0.0957472026348114
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,1,8191,0.5424992084503174
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,2,8191,0.5377168178558349
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,4,8191,0.5355519771575927
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,8,8191,0.5345680236816406
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,16,8191,0.5406511783599853
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,32,8191,0.5406208038330078
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,32,1,64,8191,0.5404767990112305
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,1,16383,0.16989599466323851
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,2,16383,0.16305439472198485
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,4,16383,0.15914080142974854
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,8,16383,0.15638400316238404
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,16,16383,0.16236799955368042
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,32,16383,0.16360000371932984
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,32,1,64,16383,0.1626927971839905
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,1,16383,1.0429823875427247
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,2,16383,1.036892795562744
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,4,16383,1.035806369781494
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,8,16383,1.0335007667541505
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,32,16383,1.0395376205444335
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,16,16383,1.0406160354614258
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,1,32767,0.30686240196228026
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,2,32767,0.29770400524139407
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,32,1,64,16383,1.040390396118164
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,4,32767,0.2931152105331421
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,8,32767,0.29111039638519287
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,16,32767,0.2970272064208984
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,32,32767,0.29785919189453125
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,32,1,64,32767,0.29597280025482176
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,1,32767,2.0427135467529296
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,2,32767,2.038190460205078
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,1,65535,0.5748960018157959
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,4,32767,2.0336431503295898
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,8,32767,2.0316207885742186
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,16,32767,2.0368064880371093
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,32,32767,2.037015914916992
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,32,1,64,32767,2.0382064819335937
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,2,65535,0.5793295860290527
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,4,65535,0.5682079792022705
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,8,65535,0.5547935962677002
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,16,65535,0.5652927875518798
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,32,65535,0.5681871891021728
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,32,1,64,65535,0.5694575786590577
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,1,65535,4.040787124633789
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,2,65535,4.038969421386719
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,1,1,0.015638400614261628
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,2,1,0.014752000570297241
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,4,65535,4.037190246582031
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,4,1,0.014441600441932679
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,8,65535,4.031777572631836
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,8,1,0.01395999938249588
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,16,1,0.014529600739479065
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,2,1,0.02062080055475235
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,16,65535,4.038747024536133
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,32,1,0.014511999487876893
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,64,1,64,1,0.013892799615859985
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,32,65535,4.038796615600586
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,1,1,0.02144159972667694
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,32,1,64,65535,4.04061279296875
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,4,1,0.020561599731445314
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,8,1,0.020001600682735442
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,16,1,0.0203232005238533
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,32,1,0.02040639966726303
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,64,1,64,1,0.020233599841594695
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,1,3,0.01541599929332733
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,2,3,0.014476799964904785
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,4,3,0.01435679942369461
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,8,3,0.01467359960079193
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,16,3,0.013998399674892425
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,32,3,0.014470399916172027
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,64,1,64,3,0.01408800035715103
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,1,3,0.02191839963197708
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,2,3,0.021003200113773345
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,4,3,0.020691199600696562
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,8,3,0.02051679939031601
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,16,3,0.0203247994184494
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,32,3,0.019947199523448943
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,64,1,64,3,0.02030239999294281
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,1,7,0.015462400019168853
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,2,7,0.014761599898338317
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,4,7,0.014571200311183929
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,8,7,0.014239999651908874
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,16,7,0.014057600498199463
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,32,7,0.01440960019826889
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,64,1,64,7,0.014403200149536133
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,1,7,0.022147199511528014
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,2,7,0.020768000185489653
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,4,7,0.020371200144290925
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,8,7,0.02045599967241287
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,16,7,0.020824000239372253
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,32,7,0.020316800475120543
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,64,1,64,7,0.02072640061378479
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,1,15,0.015500800311565399
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,2,15,0.014865599572658539
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,4,15,0.014699199795722961
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,8,15,0.014315199851989747
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,16,15,0.0141744002699852
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,32,15,0.014617599546909332
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,64,1,64,15,0.014150400459766389
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,1,15,0.022252799570560457
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,2,15,0.020777599513530733
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,4,15,0.02120320051908493
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,8,15,0.021057599782943727
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,16,15,0.02067359983921051
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,32,15,0.021184000372886657
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,64,1,64,15,0.021006399393081666
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,1,31,0.0158160001039505
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,2,31,0.014422400295734406
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,4,31,0.014263999462127686
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,8,31,0.014558400213718414
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,16,31,0.014299200475215912
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,32,31,0.014287999272346497
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,64,1,64,31,0.014371199905872345
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,1,31,0.024160000681877136
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,2,31,0.023102399706840516
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,4,31,0.022961600124835967
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,8,31,0.022777600586414336
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,16,31,0.02267040014266968
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,32,31,0.022787199914455415
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,64,1,64,31,0.022628800570964815
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,1,63,0.016459199786186218
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,2,63,0.015408000349998474
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,4,63,0.015147200226783753
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,4,63,0.026238399744033813
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,8,63,0.01451359987258911
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,16,63,0.015163199603557586
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,32,63,0.014740799367427827
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,64,1,64,63,0.014945599436759948
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,1,63,0.027342399954795836
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,2,63,0.026345598697662353
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,8,63,0.02592160105705261
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,16,63,0.02603360116481781
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,32,63,0.02598080039024353
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,64,1,64,63,0.02611680030822754
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,1,127,0.01812160015106201
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,2,127,0.01674560010433197
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,4,127,0.01693280041217804
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,8,127,0.016468800604343414
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,16,127,0.016383999586105348
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,32,127,0.01701440066099167
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,64,1,64,127,0.01658719927072525
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,1,127,0.034815999865531924
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,2,127,0.033939200639724734
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,4,127,0.03352800011634827
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,8,127,0.03362559974193573
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,16,127,0.03343679904937744
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,32,127,0.03363839983940124
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,64,1,64,127,0.03333120048046112
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,1,255,0.021848000586032867
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,2,255,0.020124800503253937
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,4,255,0.019920000433921815
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,16,255,0.019655999541282655
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,8,255,0.019679999351501463
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,32,255,0.020156799256801604
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,64,1,64,255,0.019892799854278564
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,1,255,0.05138239860534668
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,2,255,0.05043359994888306
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,4,255,0.04868319928646088
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,8,255,0.048449599742889406
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,16,255,0.04867999851703644
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,32,255,0.04853599965572357
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,64,1,64,255,0.04840160012245178
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,1,511,0.03972159922122955
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,2,511,0.030807998776435853
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,4,511,0.02529279887676239
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,8,511,0.02381120026111603
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,16,511,0.024676799774169922
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,32,511,0.025939199328422546
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,64,1,64,511,0.02598559856414795
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,1,511,0.09987199902534485
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,2,511,0.0922320008277893
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,4,511,0.08933600187301635
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,8,511,0.08748319745063782
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,16,511,0.08869919776916504
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,32,511,0.08996959924697875
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,64,1,64,511,0.08970239758491516
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,1,1023,0.05125439763069153
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,2,1023,0.042428800463676454
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,4,1023,0.038945600390434265
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,8,1023,0.03743839859962463
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,16,1023,0.03816159963607788
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,32,1023,0.038955199718475345
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,64,1,64,1023,0.03867039978504181
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,1,1023,0.16528799533843994
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,2,1023,0.15773279666900636
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,4,1023,0.15475679636001588
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,8,1023,0.15346399545669556
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,16,1023,0.15469759702682495
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,32,1023,0.15568959712982178
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,64,1,64,1023,0.1552448034286499
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,1,2047,0.06968960165977478
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,2,2047,0.06147199869155884
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,4,2047,0.056775999069213864
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,8,2047,0.05443199872970581
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,16,2047,0.05558879971504212
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,32,2047,0.057036799192428586
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,64,1,64,2047,0.05718880295753479
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,1,2047,0.29376161098480225
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,2,2047,0.2874543905258179
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,4,2047,0.28416640758514405
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,8,2047,0.28211679458618166
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,16,2047,0.28357601165771484
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,32,2047,0.2848992109298706
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,64,1,64,2047,0.2854464054107666
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,1,4095,0.10643839836120605
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,2,4095,0.09738720059394837
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,4,4095,0.0921616017818451
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,8,4095,0.08961600065231323
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,16,4095,0.09312000274658203
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,32,4095,0.09659199714660645
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,64,1,64,4095,0.09532639980316163
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,1,4095,0.5481984138488769
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,2,4095,0.5391280174255371
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,4,4095,0.5368239879608154
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,8,4095,0.53580322265625
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,16,4095,0.5386176109313965
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,32,4095,0.5395040035247802
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,64,1,64,4095,0.5410143852233886
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,1,8191,0.17538559436798096
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,2,8191,0.166212797164917
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,4,8191,0.16086239814758302
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,8,8191,0.15822720527648926
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,16,8191,0.16437280178070068
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,32,8191,0.16768959760665894
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,64,1,64,8191,0.1668704032897949
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,1,8191,1.046233558654785
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,2,8191,1.03929443359375
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,4,8191,1.0359919548034668
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,8,8191,1.0347711563110351
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,16,8191,1.0417903900146483
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,1,16383,0.31076319217681886
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,32,8191,1.0446720123291016
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,2,16383,0.29885120391845704
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,64,1,64,8191,1.0462719917297363
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,32,16383,0.30269598960876465
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,4,16383,0.29461119174957273
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,8,16383,0.2920703887939453
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,16,16383,0.2987679958343506
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,64,1,64,16383,0.3035952091217041
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,1,16383,2.0468704223632814
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,2,16383,2.042230415344238
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,4,16383,2.035817527770996
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,8,16383,2.0335535049438476
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,16,16383,2.042521667480469
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,32,16383,2.042803192138672
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,1,32767,0.5844816207885742
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,2,32767,0.5688432216644287
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,4,32767,0.5661839962005615
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,64,1,64,16383,2.0446495056152343
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,8,32767,0.5678336143493652
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,16,32767,0.5699888229370117
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,32,32767,0.5751440048217773
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,64,1,64,32767,0.5707168102264404
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,1,1,0.017239999771118165
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,2,1,0.01635199934244156
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,4,1,0.015695999562740325
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,1,32767,4.0468494415283205
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,2,32767,4.038100814819336
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,8,1,0.015758399665355683
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,8,32767,4.038584136962891
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,16,1,0.016075199842453
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,4,32767,4.034212875366211
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,32,1,0.015881599485874177
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,16,32767,4.054635238647461
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,32,32767,4.045635223388672
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,64,1,64,32767,4.042599868774414
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,128,1,64,1,0.01565759927034378
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,1,1,0.023795199394226075
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,2,1,0.022334399819374084
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,4,1,0.022299200296401978
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,8,1,0.022120000422000886
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,16,1,0.022784000635147093
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,32,1,0.022196799516677856
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,128,1,64,1,0.02205760031938553
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,1,3,0.017283199727535246
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,2,3,0.016102400422096253
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,4,3,0.01622239947319031
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,8,3,0.01611679941415787
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,16,3,0.016281600296497344
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,32,3,0.015779200196266174
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,128,1,64,3,0.016201600432395935
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,1,3,0.023563200235366823
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,2,3,0.022118400037288665
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,4,3,0.021934400498867034
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,8,3,0.022065599262714387
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,16,3,0.02202879935503006
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,32,3,0.02210880070924759
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,128,1,64,3,0.021859200298786165
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,1,7,0.017476800084114074
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,2,7,0.01613599956035614
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,4,7,0.015812799334526062
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,8,7,0.01571200042963028
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,16,7,0.015820799767971037
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,32,7,0.015612800419330598
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,128,1,64,7,0.016103999316692354
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,1,7,0.024120000004768372
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,2,7,0.022921599447727203
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,4,7,0.022536000609397887
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,8,7,0.022579200565814972
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,16,7,0.02277279943227768
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,32,7,0.02242400050163269
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,128,1,64,7,0.022331200540065765
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,1,15,0.017800000309944154
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,2,15,0.016286399960517884
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,4,15,0.016271999478340148
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,8,15,0.01626719981431961
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,16,15,0.015961599349975587
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,32,15,0.015747199952602386
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,128,1,64,15,0.01635040044784546
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,1,15,0.02625280022621155
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,1,31,0.017926399409770966
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,2,15,0.02497279942035675
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,4,15,0.024987199902534486
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,8,15,0.024875199794769286
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,16,15,0.024820800125598907
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,32,15,0.024596799910068513
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,128,1,64,15,0.024831999838352204
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,2,31,0.016228799521923066
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,2,31,0.02738400101661682
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,4,31,0.016323199868202208
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,8,31,0.016251200437545778
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,16,31,0.016310399770736693
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,32,31,0.01595360040664673
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,128,1,64,31,0.01621440052986145
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,1,31,0.028814399242401124
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,4,31,0.02747200131416321
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,8,31,0.02741599977016449
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,16,31,0.027555200457572936
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,32,31,0.027156800031661987
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,128,1,64,31,0.027396801114082336
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,1,63,0.018980799615383147
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,2,63,0.01682559996843338
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,4,63,0.01671999990940094
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,8,63,0.01652960032224655
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,16,63,0.016564799845218657
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,32,63,0.016310399770736693
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,128,1,64,63,0.01709440052509308
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,1,63,0.03677760064601898
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,2,63,0.033713600039482115
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,4,63,0.03399679958820343
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,8,63,0.03340319991111755
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,16,63,0.03362239897251129
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,32,63,0.033344000577926636
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,128,1,64,63,0.03369599878787995
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,1,127,0.02066880017518997
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,2,127,0.01880960017442703
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,4,127,0.018692800402641298
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,8,127,0.01855359971523285
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,16,127,0.01902880072593689
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,32,127,0.018303999304771425
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,128,1,64,127,0.019068799912929535
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,2,127,0.04953599870204926
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,1,127,0.055180799961090085
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,4,127,0.048283201456069944
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,8,127,0.04786239862442017
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,16,127,0.04764319956302643
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,8,255,0.02170239984989166
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,32,127,0.047092801332473753
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,128,1,64,127,0.04748800098896026
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,1,255,0.02808319926261902
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,2,255,0.02346239984035492
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,2,255,0.085835200548172
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,4,255,0.02255360037088394
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,16,255,0.02237280011177063
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,32,255,0.022129599750041962
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,128,1,64,255,0.021817600727081297
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,1,255,0.08903840184211731
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,4,255,0.08458560109138488
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,8,255,0.08446879982948304
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,16,255,0.08411359786987305
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,32,255,0.08391039967536926
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,128,1,64,255,0.0835103988647461
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,1,511,0.04336479902267456
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,2,511,0.03892639875411987
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,4,511,0.03755680024623871
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,8,511,0.03674559891223907
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,2,511,0.1556064009666443
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,16,511,0.03657119870185852
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,32,511,0.03639039993286133
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,128,1,64,511,0.036771199107170104
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,1,511,0.15909440517425538
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,4,511,0.154310405254364
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,8,511,0.1534111976623535
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,16,511,0.1532896041870117
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,32,511,0.15323359966278077
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,128,1,64,511,0.15302560329437256
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,1,1023,0.06155359745025635
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,2,1023,0.05730559825897217
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,4,1023,0.05517119765281677
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,8,1023,0.054199999570846556
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,16,1023,0.054632002115249635
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,32,1023,0.05374720096588135
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,128,1,64,1023,0.05373600125312805
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,1,1023,0.28884000778198243
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,2,1023,0.28571200370788574
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,4,1023,0.2833568096160889
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,8,1023,0.2818383932113647
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,16,1023,0.28274879455566404
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,32,1023,0.2829904079437256
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,128,1,64,1023,0.2820656061172485
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,1,2047,0.09804800152778625
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,2,2047,0.09344800114631653
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,4,2047,0.0910256028175354
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,8,2047,0.08954079747200012
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,16,2047,0.08931679725646972
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,2,2047,0.5360991954803467
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,32,2047,0.08947839736938476
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,128,1,64,2047,0.08954880237579346
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,1,2047,0.5402912139892578
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,4,2047,0.5359456062316894
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,8,2047,0.5351903915405274
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,16,2047,0.5347792148590088
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,32,2047,0.5337952136993408
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,128,1,64,2047,0.5339056015014648
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,1,4095,0.16659200191497803
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,2,4095,0.16122560501098632
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,4,4095,0.15904159545898439
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,8,4095,0.15722880363464356
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,16,4095,0.15702400207519532
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,32,4095,0.15732640027999878
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,128,1,64,4095,0.15652639865875245
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,1,4095,1.0404463768005372
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,2,4095,1.0366191864013672
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,4,4095,1.0357168197631836
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,8,4095,1.03570556640625
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,32,4095,1.035598373413086
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,2,8191,0.30267839431762694
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,16,4095,1.036201572418213
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,1,8191,0.30373120307922363
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,128,1,64,4095,1.0342911720275878
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,4,8191,0.2929167985916138
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,8,8191,0.290231990814209
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,16,8191,0.29230079650878904
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,32,8191,0.28982880115509035
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,128,1,64,8191,0.28983519077301023
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,1,8191,2.038324737548828
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,2,8191,2.0356639862060546
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,1,16383,0.580134391784668
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,4,8191,2.0334272384643555
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,8,8191,2.0329647064208984
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,16,8191,2.032718467712402
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,32,8191,2.0324159622192384
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,128,1,64,8191,2.031920051574707
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,2,16383,0.5655983924865723
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,4,16383,0.5601903915405273
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,8,16383,0.560475206375122
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,16,16383,0.5656032085418701
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,32,16383,0.5641071796417236
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,128,1,64,16383,0.5629583835601807
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,2,16383,4.034527969360352
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,4,16383,4.049172973632812
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,1,16383,4.035243225097656
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,8,16383,4.031619262695313
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,16,16383,4.0361793518066404
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,1,1,0.02619520127773285
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,32,16383,4.033500671386719
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,2,1,0.023772799968719484
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,128,1,64,16383,4.032083129882812
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,4,1,0.0235727995634079
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,8,1,0.0229312002658844
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,16,1,0.02284960001707077
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,32,1,0.02285120040178299
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,256,1,64,1,0.022945599257946016
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,1,1,0.03292160034179688
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,2,1,0.030238398909568788
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,256,1,64,1,0.02942720055580139
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,4,1,0.029820799827575684
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,8,1,0.029257598519325256
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,16,1,0.057259202003479004
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,32,1,0.028944000601768494
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,1,3,0.02587839961051941
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,256,1,64,3,0.022635200619697572
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,2,3,0.023817600309848787
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,4,3,0.023446400463581086
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,8,3,0.02295680046081543
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,16,3,0.023183999955654143
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,32,3,0.022673599421977997
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,1,3,0.03366400003433227
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,2,3,0.030695998668670656
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,4,3,0.030160000920295714
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,8,3,0.02990399897098541
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,16,3,0.030027198791503906
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,32,3,0.02945759892463684
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,256,1,64,3,0.02946079969406128
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,1,7,0.026056000590324403
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,2,7,0.0236175999045372
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,4,7,0.023364800214767455
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,8,7,0.02306720018386841
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,16,7,0.023203200101852416
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,32,7,0.02295999974012375
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,256,1,64,7,0.02281759977340698
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,1,7,0.03610720038414002
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,2,7,0.03256799876689911
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,4,7,0.03229759931564331
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,8,7,0.031865599751472476
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,16,7,0.03172639906406403
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,32,7,0.031857600808143614
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,256,1,64,7,0.03152799904346466
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,1,15,0.026180800795555115
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,2,15,0.024401600658893585
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,1,15,0.04058400094509125
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,4,15,0.023470400273799895
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,8,15,0.022676800191402436
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,16,15,0.023235200345516203
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,32,15,0.023094399273395537
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,256,1,64,15,0.022969600558280946
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,2,15,0.03553600013256073
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,4,15,0.0346560001373291
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,8,15,0.03469119966030121
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,16,15,0.03400320112705231
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,32,15,0.03425439894199371
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,256,1,64,15,0.03418880105018616
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,1,31,0.026870399713516235
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,2,31,0.02465600073337555
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,4,31,0.02332960069179535
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,8,31,0.023265600204467773
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,16,31,0.023204800486564637
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,32,31,0.022726400196552275
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,256,1,64,31,0.023071999847888946
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,1,31,0.04934079945087433
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,2,31,0.04266240000724793
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,4,31,0.04073759913444519
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,8,31,0.040375998616218566
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,16,31,0.04021919965744018
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,32,31,0.040462398529052736
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,256,1,64,31,0.03996959924697876
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,1,63,0.0335536003112793
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,2,63,0.025142401456832886
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,4,63,0.023827199637889863
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,8,63,0.02422720044851303
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,16,63,0.023761600255966187
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,32,63,0.023928000032901763
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,256,1,64,63,0.02344000041484833
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,1,63,0.0659056007862091
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,2,63,0.059113597869873045
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,4,63,0.0552672028541565
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,8,63,0.053671997785568235
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,16,63,0.05315679907798767
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,32,63,0.05322239995002746
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,8,127,0.028126400709152222
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,256,1,64,63,0.05231519937515259
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,1,127,0.03903360068798065
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,2,127,0.034185600280761716
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,4,127,0.02957119941711426
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,16,127,0.027620801329612733
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,32,127,0.027750399708747864
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,256,1,64,127,0.027798399329185486
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,1,127,0.0993776023387909
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,2,127,0.09529759883880615
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,4,127,0.0927519977092743
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,8,127,0.09150239825248718
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,16,127,0.09098880290985108
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,32,127,0.09048799872398376
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,256,1,64,127,0.09049280285835266
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,1,255,0.04822719991207123
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,2,255,0.04350399971008301
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,4,255,0.042192000150680545
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,8,255,0.041124799847602846
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,16,255,0.0401392012834549
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,32,255,0.040417599678039554
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,256,1,64,255,0.04000160098075867
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,1,255,0.16393760442733765
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,2,255,0.16024960279464723
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,4,255,0.15846879482269288
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,8,255,0.1574928045272827
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,16,255,0.15725599527359008
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,32,255,0.15662399530410767
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,8,511,0.061924797296524045
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,256,1,64,255,0.156385600566864
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,1,511,0.07546079754829407
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,2,511,0.06787199974060058
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,4,511,0.06399999856948853
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,32,511,0.06104159951210022
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,16,511,0.06113920211791992
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,256,1,64,511,0.0607151985168457
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,1,511,0.3031968116760254
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,2,511,0.295580792427063
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,4,511,0.2928416013717651
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,8,511,0.2904975891113281
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,16,511,0.28959360122680666
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,32,511,0.28954401016235354
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,256,1,64,511,0.2897615909576416
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,1,1023,0.110806405544281
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,2,1023,0.10239200592041016
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,4,1023,0.09860960245132447
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,8,1023,0.09620320200920104
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,16,1023,0.0956175982952118
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,32,1023,0.0953279972076416
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,256,1,64,1023,0.09500160217285156
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,1,1023,0.5550191879272461
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,2,1023,0.5465007781982422
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,4,1023,0.5432079792022705
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,8,1023,0.541377592086792
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,16,1023,0.5409279823303222
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,32,1023,0.5414656162261963
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,256,1,64,1023,0.541476821899414
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,1,2047,0.17930079698562623
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,2,2047,0.1695904016494751
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,4,2047,0.1663264036178589
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,8,2047,0.16283040046691893
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,16,2047,0.1629199981689453
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,32,2047,0.16197600364685058
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,256,1,64,2047,0.16212480068206786
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,1,2047,1.0535951614379884
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,2,2047,1.0458736419677734
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,4,2047,1.041659164428711
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,8,2047,1.0412240028381348
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,16,2047,1.0399328231811524
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,1,4095,0.3137808084487915
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,2,4095,0.31260321140289304
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,32,2047,1.039840030670166
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,4,4095,0.30076959133148196
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,256,1,64,2047,1.0403136253356933
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,8,4095,0.29726400375366213
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,16,4095,0.2957312107086182
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,32,4095,0.2972831964492798
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,256,1,64,4095,0.295959997177124
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,1,4095,2.0519231796264648
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,2,4095,2.043619155883789
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,4,4095,2.0419424057006834
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,8,4095,2.03940486907959
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,1,8191,0.5823088169097901
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,16,4095,2.037673568725586
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,32,4095,2.036903953552246
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,256,1,64,4095,2.0380880355834963
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,2,8191,0.6035264015197754
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,4,8191,0.5768703937530517
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,8,8191,0.5644032001495362
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,16,8191,0.5659359931945801
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,32,8191,0.5743824005126953
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,256,1,64,8191,0.5601247787475586
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,1,1,0.05068640112876892
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,2,1,0.04020479917526245
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,4,1,0.03848479986190796
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,8,1,0.03774240016937256
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,16,1,0.03746080100536346
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,1,8191,4.047401428222656
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,2,8191,4.0376945495605465
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,4,8191,4.038627243041992
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,8,8191,4.03331184387207
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,32,1,0.03775840103626251
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,16,8191,4.034864044189453
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,512,1,64,1,0.03726719915866852
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,32,8191,4.032479858398437
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,1,1,0.059571200609207155
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,2,1,0.048876801133155824
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,256,1,64,8191,4.032740783691406
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,4,1,0.04550879895687103
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,16,1,0.04423840045928955
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,8,1,0.0448415994644165
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,32,1,0.04437919855117798
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,512,1,64,1,0.04399999976158142
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,1,3,0.050526398420333865
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,2,3,0.04041439890861511
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,4,3,0.03852640092372894
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,8,3,0.03776159882545471
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,16,3,0.03758560121059418
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,32,3,0.0373775988817215
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,512,1,64,3,0.03758879899978638
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,1,3,0.06083199977874756
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,2,3,0.0507968008518219
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,4,3,0.04726400077342987
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,8,3,0.046409600973129274
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,16,3,0.046081599593162534
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,32,3,0.04604159891605377
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,512,1,64,3,0.04621120095252991
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,1,7,0.05080639719963074
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,2,7,0.04054400026798248
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,1,7,0.06467199921607972
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,4,7,0.03888320028781891
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,8,7,0.038020798563957216
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,16,7,0.03749760091304779
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,32,7,0.03742400109767914
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,512,1,64,7,0.03766719996929169
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,2,7,0.05429440140724182
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,4,7,0.05034400224685669
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,8,7,0.049188798666000365
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,16,7,0.04932959973812103
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,32,7,0.04889439940452576
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,512,1,64,7,0.048767998814582825
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,1,15,0.05130079984664917
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,2,15,0.040870401263237
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,4,15,0.038555198907852174
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,8,15,0.03792479932308197
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,16,15,0.03778879940509796
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,32,15,0.038043200969696045
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,512,1,64,15,0.03773120045661926
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,1,15,0.07158079743385315
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,2,15,0.06266559958457947
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,4,15,0.05718719959259033
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,8,15,0.05508319735527038
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,16,15,0.05475519895553589
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,32,15,0.05458719730377197
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,512,1,64,15,0.0547327995300293
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,1,31,0.05286080241203308
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,2,31,0.04323520064353943
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,4,31,0.038998401165008544
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,8,31,0.03830719888210297
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,16,31,0.037676799297332766
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,32,31,0.03782399892807007
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,512,1,64,31,0.037887999415397645
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,1,31,0.08630719780921936
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,2,31,0.07715680003166199
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,4,31,0.07353119850158692
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,8,31,0.06934080123901368
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,16,31,0.06799039840698243
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,32,31,0.06655840277671814
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,512,1,64,31,0.06696959733963012
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,1,63,0.05744640231132507
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,2,63,0.048870399594306946
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,4,63,0.04497439861297607
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,8,63,0.04171999990940094
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,16,63,0.03986720144748688
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,32,63,0.039323198795318606
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,512,1,64,63,0.039243200421333314
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,1,63,0.11917599439620971
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,2,63,0.11048799753189087
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,4,63,0.10691039562225342
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,8,63,0.10418239831924439
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,16,63,0.10238080024719239
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,32,63,0.10202879905700683
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,512,1,64,63,0.10180319547653198
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,1,127,0.06666240096092224
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,2,127,0.058590400218963626
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,4,127,0.055307197570800784
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,8,127,0.052902400493621826
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,16,127,0.052127999067306516
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,32,127,0.05091680288314819
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,512,1,64,127,0.050944000482559204
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,32,127,0.16726720333099365
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,1,127,0.18297439813613892
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,2,127,0.1751423954963684
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,4,127,0.17146400213241578
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,8,127,0.1689504027366638
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,16,127,0.16740479469299316
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,16,255,0.06984000205993653
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,512,1,64,127,0.1672127962112427
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,1,255,0.08413599729537964
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,2,255,0.07644479870796203
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,4,255,0.07284479737281799
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,8,255,0.07098079919815063
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,32,255,0.06856160163879395
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,512,1,64,255,0.06850720047950745
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,1,255,0.3084192037582397
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,2,255,0.3013263940811157
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,4,255,0.2978640079498291
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,8,255,0.29687039852142333
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,16,255,0.2959696054458618
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,32,255,0.2946367979049683
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,512,1,64,255,0.2948944091796875
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,1,511,0.13960800170898438
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,2,511,0.12444800138473511
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,4,511,0.1164512038230896
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,8,511,0.11152960062026977
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,16,511,0.11053600311279296
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,32,511,0.10999840497970581
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,512,1,64,511,0.10981600284576416
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,1,511,0.5830944061279297
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,2,511,0.5678671836853028
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,4,511,0.5605055809020996
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,8,511,0.5563792228698731
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,512,1,64,511,0.5542672157287598
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,16,511,0.5547808170318603
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,32,511,0.5533008098602294
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,1,1023,0.2082751989364624
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,2,1023,0.19171359539031982
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,4,1023,0.1836527943611145
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,8,1023,0.17988799810409545
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,16,1023,0.17813440561294555
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,32,1023,0.17674560546875
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,512,1,64,1023,0.1759727954864502
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,1,1023,1.0841039657592773
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,2,1023,1.0682687759399414
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,4,1023,1.0606032371520997
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,8,1023,1.0589296340942382
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,16,1023,1.0555487632751466
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,32,1023,1.0544560432434082
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,512,1,64,1023,1.054089641571045
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,1,2047,0.3461087942123413
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,2,2047,0.3254944086074829
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,32,2047,0.30851039886474607
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,4,2047,0.316758394241333
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,8,2047,0.31170880794525146
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,16,2047,0.31089599132537843
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,512,1,64,2047,0.3084304094314575
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,1,2047,2.0792736053466796
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,2,2047,2.065303993225098
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,8,2047,2.053019142150879
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,4,2047,2.056435203552246
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,1,4095,0.6196208000183105
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,16,2047,2.050089645385742
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,32,2047,2.0506959915161134
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,512,1,64,2047,2.0508880615234375
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,2,4095,0.6118159770965577
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,4,4095,0.5822559833526612
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,8,4095,0.5813488006591797
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,16,4095,0.5780272006988525
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,512,1,64,4095,0.5754608154296875
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,32,4095,0.584764814376831
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1024,1,1,1,0.08717280030250549
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1024,1,2,1,0.07252479791641235
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1024,1,4,1,0.06408320069313049
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1024,1,8,1,0.062191998958587645
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1024,1,16,1,0.06165760159492493
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1024,1,32,1,0.06156960129737854
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1024,1,64,1,0.061368000507354734
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,1,4095,4.079528045654297
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1024,1,1,1,0.09805439710617066
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,2,4095,4.067299270629883
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,4,4095,4.054671859741211
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,8,4095,4.0486400604248045
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,16,4095,4.046913528442383
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1024,1,4,1,0.07618240118026734
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1024,1,2,1,0.08479520082473754
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,32,4095,4.046876907348633
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1024,1,8,1,0.07135040163993836
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1024,1,16,1,0.07054399847984313
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,512,1,64,4095,4.043827056884766
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1024,1,32,1,0.07050560116767883
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1024,1,64,1,0.07017760276794434
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1024,1,32,3,0.061838400363922116
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1024,1,1,3,0.08730080127716064
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1024,1,2,3,0.07275360226631164
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1024,1,4,3,0.06483039855957032
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1024,1,8,3,0.06250879764556885
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1024,1,16,3,0.061624002456665036
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1024,1,64,3,0.06133279800415039
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1024,1,1,3,0.10120160579681396
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1024,1,2,3,0.0872655987739563
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1024,1,4,3,0.08028159737586975
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1024,1,8,3,0.07461599707603454
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1024,1,16,3,0.07324320077896118
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1024,1,32,3,0.07385600209236146
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1024,1,64,3,0.07297919988632202
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1024,1,1,7,0.0896511971950531
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1024,1,2,7,0.07269600033760071
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1024,1,4,7,0.06550080180168152
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1024,1,8,7,0.06171839833259583
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1024,1,16,7,0.062118399143219
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1024,1,32,7,0.06174719929695129
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1024,1,64,7,0.06136639714241028
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1024,1,1,7,0.10950560569763183
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1024,1,2,7,0.09375200271606446
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1024,1,4,7,0.08817600011825562
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1024,1,8,7,0.08127840161323548
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1024,1,16,7,0.07981119751930237
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1024,1,32,7,0.0787551999092102
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1024,1,64,7,0.07879520058631898
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1024,1,1,15,0.08964319825172425
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1024,1,2,15,0.07375839948654175
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1024,1,4,15,0.06595199704170226
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1024,1,8,15,0.06199520230293274
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1024,1,16,15,0.0625760018825531
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1024,1,32,15,0.061692798137664796
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1024,1,64,15,0.06168320178985596
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1024,1,1,15,0.12526079416275024
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1024,1,2,15,0.10768959522247315
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1024,1,4,15,0.10210720300674439
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1024,1,8,15,0.097980797290802
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1024,1,16,15,0.09517120122909546
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1024,1,32,15,0.09273599982261657
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1024,1,64,15,0.09217439889907837
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1024,1,1,31,0.0919215977191925
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1024,1,2,31,0.07535200119018555
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1024,1,4,31,0.06933439970016479
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1024,1,8,31,0.06423360109329224
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1024,1,16,31,0.06262239813804626
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1024,1,32,31,0.06187999844551086
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1024,1,64,31,0.06150720119476318
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1024,1,1,31,0.15525120496749878
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1024,1,64,31,0.12528159618377685
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1024,1,2,31,0.13789279460906984
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1024,1,4,31,0.1314703941345215
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1024,1,8,31,0.12791520357131958
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1024,1,16,31,0.12643359899520873
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1024,1,32,31,0.1257904052734375
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1024,1,1,63,0.09946560263633727
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1024,1,2,63,0.08125759959220887
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1024,1,4,63,0.07452639937400818
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1024,1,8,63,0.07170559763908387
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1024,1,16,63,0.07047839760780335
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1024,1,32,63,0.06987360119819641
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1024,1,64,63,0.06908159852027893
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1024,1,1,63,0.21596479415893555
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1024,1,2,63,0.1979856014251709
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1024,1,4,63,0.1918928027153015
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1024,1,8,63,0.18808799982070923
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1024,1,16,63,0.18685120344161987
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1024,1,32,63,0.18643840551376342
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1024,1,64,63,0.18543679714202882
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1024,1,1,127,0.11748960018157958
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1024,1,2,127,0.10007359981536865
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1024,1,4,127,0.0914784014225006
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1024,1,8,127,0.08767679929733277
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1024,1,16,127,0.08579360246658325
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1024,1,32,127,0.08574720025062561
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1024,1,64,127,0.0855840027332306
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1024,1,1,127,0.3421823978424072
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1024,1,2,127,0.3255199909210205
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1024,1,4,127,0.31825759410858157
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1024,1,8,127,0.3124768018722534
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1024,1,16,127,0.31105918884277345
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1024,1,32,127,0.30974879264831545
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1024,1,64,127,0.30921599864959715
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1024,1,1,255,0.14916479587554932
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1024,1,2,255,0.13403680324554443
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1024,1,4,255,0.12748639583587645
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1024,1,8,255,0.12339040040969848
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1024,1,16,255,0.12106399536132813
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1024,1,32,255,0.12011040449142456
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1024,1,64,255,0.11866079568862915
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1024,1,1,255,0.5903247833251953
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1024,1,2,255,0.5759664058685303
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1024,1,8,255,0.5648608207702637
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1024,1,4,255,0.5687664031982422
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1024,1,16,255,0.5630991935729981
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1024,1,32,255,0.5622255802154541
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1024,1,64,255,0.561348819732666
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1024,1,1,511,0.2546832084655762
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1024,1,2,511,0.2249392032623291
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1024,1,4,511,0.20938560962677003
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1024,1,8,511,0.2008687973022461
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1024,1,16,511,0.1990880012512207
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1024,1,32,511,0.19800959825515746
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1024,1,64,511,0.1969488024711609
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1024,1,1,511,1.131822395324707
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1024,1,2,511,1.099513626098633
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1024,1,4,511,1.083631992340088
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1024,1,8,511,1.0763584136962892
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1024,1,16,511,1.076244831085205
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1024,1,32,511,1.0737327575683593
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1024,1,1,1023,0.39110560417175294
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1024,1,64,511,1.075052833557129
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1024,1,4,1023,0.3444288015365601
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1024,1,2,1023,0.36047680377960206
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1024,1,8,1023,0.33682880401611326
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1024,1,16,1023,0.33352320194244384
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1024,1,32,1023,0.3318160057067871
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1024,1,64,1023,0.3293488025665283
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1024,1,2,1023,2.099684715270996
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1024,1,4,1023,2.0845008850097657
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1024,1,1,1023,2.1286880493164064
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1024,1,8,1023,2.076371192932129
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1024,1,16,1023,2.0734016418457033
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1024,1,32,1023,2.0731456756591795
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1024,1,64,1023,2.070804786682129
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1024,1,1,2047,0.6858992099761962
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1024,1,2,2047,0.6415679931640625
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1024,1,4,2047,0.6131360054016113
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1024,1,8,2047,0.6047647953033447
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1024,1,16,2047,0.597047996520996
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1024,1,32,2047,0.599019193649292
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,1,1024,1,64,2047,0.5947855949401856
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1024,1,1,2047,4.134246444702148
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1024,1,2,2047,4.096760177612305
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1,1,1,1,0.013259199261665345
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1024,1,4,2047,4.081124877929687
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,2,1,0.012907199561595917
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,4,1,0.012534399330615998
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1024,1,8,2047,4.076265716552735
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,16,1,0.012111999839544297
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,8,1,0.01223680004477501
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1024,1,16,2047,4.067216110229492
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,1,1024,1,64,2047,4.063888168334961
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,8,1,0.018062399327754976
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1024,1,32,2047,4.07115364074707
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,32,1,0.012187200039625168
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,16,1,0.01786399930715561
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,64,1,0.012223999947309494
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,2,1,0.018768000602722167
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1,1,1,1,0.018855999410152435
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,4,1,0.018459199368953703
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,32,1,0.01815039962530136
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,64,1,0.01815840005874634
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,2,3,0.012915199995040894
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1,1,1,3,0.013382400572299957
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,4,3,0.012520000338554382
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,8,3,0.012305600196123123
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,16,3,0.01212640032172203
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,32,3,0.012067200243473053
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,64,3,0.012164799869060517
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1,1,1,3,0.019249600172042847
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,2,3,0.0189423993229866
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,4,3,0.01855680048465729
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,8,3,0.018329599499702455
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,16,3,0.017880000174045563
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,32,3,0.017844800651073457
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,64,3,0.018001599609851836
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1,1,1,7,0.013342399895191193
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,2,7,0.012873600423336028
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,4,7,0.012486399710178375
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,8,7,0.012135999649763108
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,16,7,0.012055999785661697
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,32,7,0.012137600034475327
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,64,7,0.012132800370454788
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1,1,1,7,0.019176000356674196
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,2,7,0.018747200071811677
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,4,7,0.018302400410175324
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,8,7,0.018187199532985688
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,16,7,0.018136000633239745
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,32,7,0.01817920058965683
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,64,7,0.017897599935531618
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1,1,1,15,0.013276800513267517
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,2,15,0.012984000146389008
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,4,15,0.012451200187206269
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,8,15,0.012225600332021714
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,16,15,0.012176000326871873
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,32,15,0.012129600346088409
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,64,15,0.012265600264072418
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1,1,1,15,0.019350400567054747
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,2,15,0.018729600310325622
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,4,15,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,8,15,0.01788319945335388
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,16,15,0.01807200014591217
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,32,15,0.018033599853515624
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,64,15,0.018302400410175324
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,2,31,0.01300320029258728
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1,1,1,31,0.01329759955406189
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,4,31,0.012465599924325943
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,8,31,0.012359999865293504
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,16,31,0.011987199634313583
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,32,31,0.012120000272989272
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,64,31,0.012222400307655335
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1,1,1,31,0.019337600469589232
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,2,31,0.018721599876880646
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,4,31,0.018537600338459016
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,8,31,0.01793919950723648
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,16,31,0.01817599982023239
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,32,31,0.018087999522686006
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,64,31,0.018007999658584593
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1,1,1,63,0.013147200644016265
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,2,63,0.012943999469280243
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,4,63,0.012379200011491776
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,8,63,0.012425599992275238
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,16,63,0.012104000151157378
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,32,63,0.012223999947309494
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,64,63,0.012116800248622894
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1,1,1,63,0.019072000682353974
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,2,63,0.018900799751281738
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,4,63,0.018699200451374055
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,8,63,0.01822720021009445
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,16,63,0.018211199343204497
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,32,63,0.017985600233078002
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,64,63,0.01815200001001358
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1,1,1,127,0.014664000272750855
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,2,127,0.014825600385665893
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,4,127,0.01438080072402954
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,8,127,0.014103999733924866
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,16,127,0.013939200341701508
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,32,127,0.013896000385284425
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,64,127,0.014000000059604644
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1,1,1,127,0.020446400344371795
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,2,127,0.02067199945449829
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,4,127,0.020478400588035583
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,8,127,0.020015999674797058
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,16,127,0.020134399831295013
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,32,127,0.019623999297618867
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,64,127,0.019843199849128725
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1,1,1,255,0.017811200022697447
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,2,255,0.01791359931230545
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,4,255,0.01765120029449463
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,8,255,0.017057600617408752
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,8,255,0.023395200073719025
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,16,255,0.01704320013523102
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,32,255,0.01713919937610626
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,64,255,0.01716800034046173
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1,1,1,255,0.05097439885139465
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,2,255,0.023680000007152556
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,4,255,0.023284800350666046
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,16,255,0.023056000471115112
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,32,255,0.023192000389099122
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,64,255,0.023071999847888946
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1,1,1,511,0.019124799966812135
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,2,511,0.018904000520706177
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,4,511,0.017564800381660462
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,8,511,0.016977599263191222
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,16,511,0.016683200001716615
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,32,511,0.01763039976358414
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,64,511,0.01785600036382675
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1,1,1,511,0.02560800015926361
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,2,511,0.02486719936132431
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,4,511,0.023984000086784363
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,8,511,0.02314079999923706
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,16,511,0.022852799296379088
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,32,511,0.023928000032901763
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,64,511,0.024217599630355836
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1,1,1,1023,0.020001600682735442
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,2,1023,0.01897439956665039
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,4,1023,0.017961600422859193
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,2,1023,0.025811201333999632
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,8,1023,0.01740639954805374
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,16,1023,0.016808000206947327
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,32,1023,0.017745600640773775
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,64,1023,0.01802240014076233
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1,1,1,1023,0.02653760015964508
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,4,1023,0.024966399371623992
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,8,1023,0.023745599389076232
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,16,1023,0.023470400273799895
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,32,1023,0.024345600605010988
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,64,1023,0.024481600522994994
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1,1,1,2047,0.02138720005750656
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,2,2047,0.020046399533748628
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,4,2047,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,8,2047,0.018083199858665466
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,16,2047,0.01770080029964447
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,32,2047,0.018004800379276275
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,64,2047,0.01797119975090027
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1,1,1,2047,0.029495999217033386
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,2,2047,0.028729599714279175
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,4,2047,0.027209600806236266
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,8,2047,0.026516801118850707
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,16,2047,0.026631999015808105
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,32,2047,0.026715201139450074
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,64,2047,0.026774400472640993
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1,1,1,4095,0.023240000009536743
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,2,4095,0.022337600588798523
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,4,4095,0.0204927995800972
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,8,4095,0.019950400292873382
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,16,4095,0.019307200610637665
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,32,4095,0.020278400182723998
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,64,4095,0.02075839936733246
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1,1,1,4095,0.0343311995267868
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,2,4095,0.03326080143451691
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,4,4095,0.03189440071582794
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,8,4095,0.0314191997051239
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,16,4095,0.030806401371955873
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,32,4095,0.03165760040283203
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,64,4095,0.03206880092620849
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1,1,1,8191,0.02611039876937866
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,2,8191,0.02481600046157837
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,4,8191,0.02344000041484833
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,8,8191,0.02223680019378662
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,16,8191,0.0218639999628067
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,32,8191,0.02386080026626587
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,64,8191,0.02378080040216446
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1,1,1,8191,0.04426240026950836
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,2,8191,0.042473599314689636
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,4,8191,0.04116159975528717
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,8,8191,0.03989120125770569
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,16,8191,0.039241600036621097
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,32,8191,0.041022399067878725
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,64,8191,0.041280001401901245
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1,1,1,16383,0.030326399207115173
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,2,16383,0.028718400001525878
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,4,16383,0.027289599180221558
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,8,16383,0.02582559883594513
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,16,16383,0.025206398963928223
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,32,16383,0.02563520073890686
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,64,16383,0.02597759962081909
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1,1,1,16383,0.06487039923667907
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,2,16383,0.06420320272445679
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,4,16383,0.05819839835166931
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,8,16383,0.05542880296707153
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,16,16383,0.05533120036125183
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,32,16383,0.05469920039176941
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,64,16383,0.055516797304153445
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1,1,1,32767,0.03973920047283173
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,2,32767,0.03716639876365661
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,4,32767,0.03184320032596588
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,8,32767,0.030417600274086
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,16,32767,0.02993600070476532
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,32,32767,0.02972320020198822
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,64,32767,0.03007520139217377
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1,1,1,32767,0.10089279413223266
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,2,32767,0.09790560007095336
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,4,32767,0.09604960083961486
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,8,32767,0.09483519792556763
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,16,32767,0.09410880208015442
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,32,32767,0.09328960180282593
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,64,32767,0.09428640007972718
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1,1,1,65535,0.05427039861679077
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,2,65535,0.04784800112247467
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,4,65535,0.045495998859405515
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,8,65535,0.04423039853572845
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,16,65535,0.04319359958171844
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,32,65535,0.042929598689079286
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,64,65535,0.04332320094108581
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1,1,1,65535,0.17106239795684813
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,2,65535,0.16384479999542237
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,4,65535,0.1619328022003174
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,8,65535,0.16089600324630737
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,16,65535,0.16092640161514282
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1,1,8,131071,0.06123359799385071
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,32,65535,0.16035360097885132
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,64,65535,0.16042239665985109
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1,1,1,131071,0.08551200032234192
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1,1,2,131071,0.06748639941215515
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1,1,4,131071,0.06359040141105651
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1,1,16,131071,0.06093760132789612
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1,1,8,131071,0.2883152008056641
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1,1,32,131071,0.0608784019947052
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1,1,64,131071,0.06040319800376892
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1,1,1,131071,0.30908639430999757
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1,1,2,131071,0.2930880069732666
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1,1,4,131071,0.28975040912628175
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1,1,16,131071,0.28858559131622313
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1,1,32,131071,0.2885040044784546
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1,1,64,131071,0.2878607988357544
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,2,1,1,1,0.013502399623394012
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,2,1,0.01292479932308197
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,4,1,0.012363199889659882
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,8,1,0.012121599912643433
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,16,1,0.012120000272989272
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,32,1,0.012145599722862244
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,64,1,0.01215519979596138
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,2,1,1,1,0.019094400107860565
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,2,1,0.018648000061511995
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,4,1,0.018067200481891633
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,8,1,0.01804320067167282
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,16,1,0.01770240068435669
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,32,1,0.017684799432754517
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,64,1,0.017745600640773775
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,2,1,1,3,0.013491199910640716
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,2,3,0.012958399951457977
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,4,3,0.012507200241088867
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,8,3,0.012249600142240524
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,16,3,0.012124799937009812
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,32,3,0.012188799679279327
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,64,3,0.012116800248622894
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,2,1,1,3,0.019092799723148347
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,2,3,0.018467199802398682
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,4,3,0.018172800540924072
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,8,3,0.017998400330543517
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,16,3,0.017790399491786957
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,32,3,0.017748799920082093
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,64,3,0.017766399681568144
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,2,1,1,7,0.013441599905490875
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,2,7,0.012775999307632447
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,4,7,0.012534399330615998
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,8,7,0.012118399888277055
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,16,7,0.0121568001806736
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,32,7,0.012108799815177918
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,64,7,0.012124799937009812
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,2,1,1,7,0.018907199800014495
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,2,7,0.018566399812698364
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,4,7,0.01786559969186783
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,8,7,0.017587199807167053
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,16,7,0.017791999876499175
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,32,7,0.01783200055360794
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,64,7,0.017684799432754517
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,2,1,1,15,0.013376000523567199
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,2,15,0.012831999361515046
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,4,15,0.012561599910259246
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,8,15,0.012303999811410903
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,16,15,0.012219200283288956
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,32,15,0.012206400185823441
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,64,15,0.012124799937009812
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,2,1,1,15,0.01915999948978424
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,2,15,0.01886080056428909
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,4,15,0.018323199450969697
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,8,15,0.017903999984264375
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,16,15,0.0179967999458313
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,32,15,0.01786080002784729
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,64,15,0.017847999930381775
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,2,1,1,31,0.013500800728797913
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,2,31,0.01290079951286316
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,2,1,1,31,0.01903039962053299
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,4,31,0.012572799623012543
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,8,31,0.012169600278139115
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,16,31,0.012198399752378464
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,32,31,0.012249600142240524
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,64,31,0.012201599776744843
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,2,31,0.018723200261592864
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,4,31,0.01815840005874634
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,8,31,0.017948800325393678
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,16,31,0.017679999768733978
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,32,31,0.0176816001534462
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,64,31,0.017990399897098542
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,2,1,1,63,0.013332800567150116
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,2,63,0.012884800136089326
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,4,63,0.012585599720478059
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,8,63,0.012359999865293504
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,16,63,0.012398400157690049
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,32,63,0.012278400361537933
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,64,63,0.012177599966526032
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,2,1,1,63,0.019099199771881105
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,2,63,0.018931199610233308
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,4,63,0.01815200001001358
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,8,63,0.017950400710105896
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,16,63,0.018116800487041472
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,32,63,0.017927999794483184
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,64,63,0.017936000227928163
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,2,1,1,127,0.01501920074224472
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,2,127,0.014769600331783294
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,4,127,0.014168000221252442
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,8,127,0.014032000303268432
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,16,127,0.013872000575065612
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,32,127,0.014006400108337402
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,32,127,0.019569599628448488
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,64,127,0.014006400108337402
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,2,1,1,127,0.02077440023422241
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,2,127,0.020260800421237946
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,4,127,0.019964799284934998
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,8,127,0.01987839937210083
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,16,255,0.017105600237846373
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,16,127,0.01963520050048828
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,64,127,0.019566400349140166
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,2,1,1,255,0.017995199561119078
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,2,255,0.017905600368976593
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,4,255,0.01736160069704056
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,8,255,0.017217600345611574
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,32,255,0.017131200432777403
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,64,255,0.017092800140380858
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,2,1,1,255,0.02381120026111603
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,2,255,0.02369920015335083
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,2,511,0.018755200505256652
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,4,255,0.023156799376010895
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,8,255,0.023035199940204622
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,16,255,0.022878399491310118
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,32,255,0.02287999987602234
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,64,255,0.022947199642658234
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,2,1,1,511,0.019673599302768706
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,4,511,0.01788319945335388
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,8,511,0.016977599263191222
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,16,511,0.016715200245380403
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,32,511,0.017723199725151063
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,64,511,0.017977599799633027
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,2,1,1,511,0.026254400610923767
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,2,511,0.02573440074920654
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,4,511,0.024140800535678863
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,8,511,0.023470400273799895
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,16,511,0.0232464000582695
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,32,511,0.02425280064344406
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,64,511,0.024820800125598907
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,2,1,1,1023,0.02040479928255081
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,2,1023,0.019424000382423402
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,4,1023,0.018188799917697906
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,8,1023,0.017535999417304993
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,16,1023,0.01727519929409027
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,32,1023,0.01796479970216751
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,64,1023,0.01804320067167282
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,2,1,1,1023,0.029123198986053467
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,2,1023,0.028166401386260986
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,4,1023,0.026804798841476442
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,8,1023,0.02587040066719055
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,16,1023,0.02529279887676239
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,32,1023,0.026284798979759216
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,64,1023,0.02645280063152313
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,2,1,1,2047,0.021435199677944182
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,2,2047,0.0204815998673439
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,4,2047,0.01926559954881668
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,8,2047,0.018441599607467652
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,16,2047,0.01818079948425293
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,32,2047,0.018203200399875642
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,64,2047,0.01849119961261749
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,2,1,1,2047,0.0337007999420166
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,2,2047,0.0321040004491806
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,4,2047,0.03054719865322113
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,8,2047,0.029886400699615477
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,16,2047,0.02945759892463684
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,32,2047,0.029550400376319886
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,64,2047,0.029838401079177856
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,2,1,1,4095,0.02470400035381317
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,32,4095,0.020295999944210052
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,2,4095,0.022294400632381438
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,2,4095,0.04014880061149597
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,4,4095,0.03839839994907379
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,4,4095,0.020681600272655486
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,8,4095,0.020510399341583253
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,16,4095,0.019683200120925903
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,64,4095,0.020448000729084016
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,2,1,1,4095,0.04233759939670563
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,8,4095,0.03743839859962463
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,16,4095,0.03670400083065033
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,32,4095,0.037724798917770384
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,64,4095,0.03776639997959137
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,2,1,1,8191,0.029016000032424927
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,2,8191,0.026265600323677064
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,4,8191,0.023446400463581086
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,8,8191,0.022864000499248506
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,16,8191,0.02221439927816391
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,32,8191,0.02493920028209686
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,64,8191,0.025492799282073975
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,2,1,1,8191,0.06365280151367188
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,2,8191,0.06186079978942871
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,4,8191,0.0541055977344513
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,8,8191,0.052420800924301146
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,16,8191,0.05157439708709717
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,32,8191,0.05445600152015686
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,64,8191,0.05541920065879822
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,2,1,1,16383,0.03845439851284027
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,2,16383,0.03360480070114136
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,4,16383,0.02902719974517822
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,2,16383,0.09499520063400269
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,8,16383,0.027531200647354127
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,16,16383,0.026819199323654175
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,32,16383,0.028646400570869444
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,64,16383,0.029142400622367857
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,2,1,1,16383,0.09936000108718872
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,4,16383,0.09205600023269653
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,8,16383,0.09122560024261475
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,16,16383,0.09016159772872925
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,32,16383,0.0927951991558075
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,64,16383,0.09271360039710999
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,2,1,1,32767,0.053513598442077634
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,2,32767,0.04525760114192963
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,4,32767,0.04116320013999939
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,8,32767,0.03970080018043518
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,16,32767,0.038996800780296326
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,32,32767,0.04050880074501038
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,64,32767,0.04115520119667053
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,2,1,1,32767,0.16940159797668458
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,2,32767,0.1600208044052124
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,4,32767,0.157151997089386
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,8,32767,0.1565999984741211
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,16,32767,0.15629440546035767
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,32,32767,0.15801600217819214
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,64,32767,0.15837759971618653
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,2,1,1,65535,0.08263199925422668
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,2,65535,0.06345279812812805
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,4,65535,0.05896639823913574
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,8,65535,0.0575007975101471
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,16,65535,0.056251198053359985
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,32,65535,0.058143997192382814
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,64,65535,0.05884479880332947
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,2,1,1,65535,0.30658879280090334
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,2,65535,0.28916640281677247
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,4,65535,0.2860912084579468
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,8,65535,0.2849519968032837
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,16,65535,0.2848543882369995
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,32,65535,0.2859839916229248
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,64,65535,0.28739840984344484
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,2,1,1,131071,0.13870240449905397
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,2,1,2,131071,0.0996720016002655
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,2,1,4,131071,0.09614880084991455
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,2,1,8,131071,0.09362879991531373
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,2,1,16,131071,0.09212960004806518
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,2,1,32,131071,0.09440000057220459
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,2,1,64,131071,0.09471679925918579
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,2,1,1,131071,0.5794032096862793
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,2,1,2,131071,0.5419424057006836
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,2,1,4,131071,0.5387567996978759
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,2,1,8,131071,0.5377952098846436
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,2,1,16,131071,0.536684799194336
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,4,1,1,1,0.013449600338935852
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,2,1,32,131071,0.5387584209442139
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,2,1,0.013041600584983826
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,2,1,64,131071,0.5385007858276367
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,4,1,0.012630400061607362
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,8,1,0.012300799787044524
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,16,1,0.012275200337171555
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,32,1,0.01228799968957901
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,64,1,0.012352000176906585
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,4,1,1,1,0.019390399754047393
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,2,1,0.01881600022315979
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,4,1,0.01844480037689209
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,8,1,0.018185600638389587
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,16,1,0.018094399571418764
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,32,1,0.018134400248527527
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,64,1,0.01820639967918396
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,4,1,1,3,0.013369600474834441
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,2,3,0.013020800054073333
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,4,3,0.01257600039243698
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,8,3,0.012411200255155564
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,16,3,0.012135999649763108
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,32,3,0.012256000190973282
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,64,3,0.01228479966521263
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,4,1,1,3,0.01929599940776825
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,2,3,0.018731200695037843
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,4,3,0.01855680048465729
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,8,3,0.018136000633239745
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,16,3,0.0181536003947258
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,32,3,0.018089599907398224
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,64,3,0.01828639954328537
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,4,1,1,7,0.013441599905490875
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,2,7,0.013100799918174744
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,4,7,0.01242239996790886
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,8,7,0.012444800138473511
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,16,7,0.012223999947309494
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,32,7,0.012328000366687774
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,64,7,0.012334399670362473
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,4,1,1,7,0.019403199851512908
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,2,7,0.018881599605083465
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,4,7,0.018505600094795228
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,8,7,0.018163199722766876
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,16,7,0.018140800297260284
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,32,7,0.018031999468803406
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,64,7,0.017975999414920805
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,4,1,1,15,0.01345440000295639
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,2,15,0.013099199533462525
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,4,15,0.012604799866676331
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,8,15,0.012591999769210816
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,16,15,0.01234079971909523
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,32,15,0.012252800166606903
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,64,15,0.012204799801111221
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,4,1,1,15,0.019166399538517
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,2,15,0.019017599523067474
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,4,15,0.018513600528240203
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,8,15,0.018406400084495546
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,16,15,0.018198400735855103
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,32,15,0.018292799592018127
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,64,15,0.018187199532985688
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,4,1,1,31,0.013460800051689148
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,2,31,0.01313440054655075
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,4,31,0.012595200538635254
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,8,31,0.01228799968957901
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,16,31,0.012399999797344208
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,32,31,0.012387199699878693
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,64,31,0.012196800112724305
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,4,1,1,31,0.019415999948978423
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,2,31,0.01887039989233017
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,4,31,0.018580800294876097
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,8,31,0.018216000497341157
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,16,31,0.01815039962530136
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,32,31,0.018223999440670012
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,64,31,0.018195199966430663
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,4,1,1,63,0.01342719942331314
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,2,63,0.013286399841308593
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,4,63,0.012612800300121307
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,8,63,0.01249760016798973
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,16,63,0.012267199903726577
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,32,63,0.01242400035262108
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,64,63,0.012297599762678146
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,4,1,1,63,0.019411200284957887
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,2,63,0.018960000574588777
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,4,63,0.018718400597572328
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,8,63,0.01844319999217987
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,16,63,0.018267199397087097
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,16,127,0.014153599739074707
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,32,63,0.018199999630451203
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,64,63,0.018249599635601042
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,4,1,1,127,0.015068799257278442
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,2,127,0.02102559953927994
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,2,127,0.0150736004114151
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,4,127,0.014345599710941315
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,8,127,0.014289599657058717
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,32,127,0.01420000046491623
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,64,127,0.013937599956989288
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,4,1,1,127,0.020983999967575072
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,4,127,0.020491200685501098
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,8,127,0.02022079974412918
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,16,127,0.020094400644302367
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,32,127,0.02005600035190582
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,64,127,0.02028000056743622
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,4,1,1,255,0.01809599995613098
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,2,255,0.018007999658584593
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,4,255,0.01737920045852661
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,8,255,0.017449599504470826
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,16,255,0.017392000555992125
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,32,255,0.01730400025844574
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,64,255,0.017000000178813934
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,4,1,1,255,0.02460319995880127
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,2,255,0.024267199635505676
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,4,255,0.0238864004611969
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,8,255,0.023824000358581544
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,16,255,0.023740799725055696
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,32,255,0.023689599335193635
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,64,255,0.023614400625228883
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,4,1,1,511,0.020326399803161622
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,2,511,0.019543999433517457
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,4,511,0.018041600286960603
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,8,511,0.01749120056629181
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,16,511,0.017239999771118165
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,32,511,0.018297599256038667
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,64,511,0.01833280026912689
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,4,1,1,511,0.029047998785972595
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,2,511,0.027928000688552855
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,4,511,0.027031999826431275
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,8,511,0.025995200872421263
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,16,511,0.025696000456809996
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,32,511,0.026358398795127868
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,64,511,0.026948800683021544
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,4,1,1,1023,0.021910400688648225
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,2,1023,0.019868800044059755
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,4,1023,0.018479999899864197
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,8,1023,0.017926399409770966
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,16,1023,0.017313599586486816
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,32,1023,0.0182559996843338
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,64,1023,0.018324799835681915
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,4,1,1,1023,0.03360480070114136
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,2,1023,0.031385600566864014
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,4,1023,0.030041599273681642
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,8,1023,0.028969600796699524
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,16,1023,0.028763198852539064
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,32,1023,0.0295632004737854
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,64,1023,0.029790401458740234
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,4,1,1,2047,0.024086399376392363
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,2,2047,0.02240640074014664
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,4,2047,0.019785599410533906
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,8,2047,0.019201600551605226
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,16,2047,0.018751999735832213
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,32,2047,0.018940800428390504
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,64,2047,0.01886560022830963
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,4,1,1,2047,0.04206559956073761
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,2,2047,0.03967519998550415
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,4,2047,0.03747360110282898
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,8,2047,0.03639520108699799
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,16,2047,0.03626399934291839
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,32,2047,0.03627040088176727
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,64,2047,0.036374399065971376
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,4,1,1,4095,0.028942400217056276
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,2,4095,0.02479359954595566
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,4,4095,0.02226399928331375
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,8,4095,0.021539199352264404
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,16,4095,0.02114879935979843
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,32,4095,0.02245440036058426
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,64,4095,0.022681599855422972
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,4,1,1,4095,0.06368160247802734
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,2,4095,0.059913599491119386
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,4,4095,0.05310720205307007
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,8,4095,0.05389279723167419
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,16,4095,0.05084959864616394
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,32,4095,0.05184800028800964
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,64,4095,0.052160000801086424
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,4,1,1,8191,0.038332799077034
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,2,8191,0.03250400125980377
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,4,8191,0.02720319926738739
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,8,8191,0.02539680004119873
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,16,8191,0.02481600046157837
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,32,8191,0.029523199796676634
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,64,8191,0.029769599437713623
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,4,1,1,8191,0.09922080039978028
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,2,8191,0.09364479780197144
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,4,8191,0.09040639996528625
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,8,8191,0.08985599875450134
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,16,8191,0.08895679712295532
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,32,8191,0.09281759858131408
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,64,8191,0.093859201669693
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,4,1,1,16383,0.053311997652053834
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,2,16383,0.043808001279830935
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,4,16383,0.03981119990348816
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,8,16383,0.03835200071334839
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,16,16383,0.03764159977436066
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,32,16383,0.04043200016021729
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,64,16383,0.041319999098777774
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,4,1,1,16383,0.16975680589675904
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,2,16383,0.1582208037376404
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,4,16383,0.15570080280303955
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,2,32767,0.06154239773750305
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,8,16383,0.15479520559310914
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,16,16383,0.15393439531326295
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,32,16383,0.1578879952430725
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,64,16383,0.15860960483551026
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,4,1,1,32767,0.081768000125885
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,4,32767,0.05864800214767456
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,8,32767,0.056062400341033936
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,16,32767,0.05518239736557007
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,32,32767,0.058508801460266116
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,64,32767,0.05916320085525513
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,4,1,1,32767,0.30603039264678955
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,2,32767,0.287609601020813
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,4,32767,0.2851151943206787
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,8,32767,0.28395519256591795
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,16,32767,0.28414719104766845
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,32,32767,0.2864255905151367
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,64,32767,0.2869071960449219
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,4,1,1,65535,0.14042240381240845
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,2,65535,0.09804319739341735
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,4,65535,0.09320160150527954
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,8,65535,0.09153439998626708
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,16,65535,0.08976160287857056
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,32,65535,0.09374560117721557
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,64,65535,0.0935808002948761
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,4,1,1,65535,0.5806096076965332
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,2,65535,0.5387904167175293
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,4,65535,0.5369184017181396
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,8,65535,0.5357183933258056
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,16,65535,0.5343440055847168
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,32,65535,0.5369056224822998
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,64,65535,0.537718391418457
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,4,1,1,131071,0.2530400037765503
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,4,1,2,131071,0.16836320161819457
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,4,1,4,131071,0.1635696053504944
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,4,1,8,131071,0.16027040481567384
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,4,1,16,131071,0.15920000076293944
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,4,1,32,131071,0.1613808035850525
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,4,1,64,131071,0.1628224015235901
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,4,1,1,131071,1.1219743728637694
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,4,1,2,131071,1.040719985961914
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,4,1,4,131071,1.037558364868164
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,8,1,1,1,0.014416000247001648
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,4,1,8,131071,1.0359375953674317
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,2,1,0.01311199963092804
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,4,1,16,131071,1.0362064361572265
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,4,1,0.012755200266838074
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,4,1,32,131071,1.0385120391845704
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,4,1,64,131071,1.0386719703674316
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,8,1,0.01242239996790886
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,16,1,0.012368000298738479
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,32,1,0.012455999851226807
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,8,1,1,1,0.019633600115776063
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,64,1,0.012337599694728852
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,2,1,0.019288000464439393
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,4,1,0.01886879950761795
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,8,1,0.01879359930753708
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,16,1,0.01839199960231781
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,32,1,0.018408000469207764
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,64,1,0.018372799456119537
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,8,1,1,3,0.013468800485134125
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,2,3,0.013099199533462525
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,2,3,0.019324800372123717
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,4,3,0.012825599312782288
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,8,3,0.012479999661445617
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,16,3,0.012270399928092956
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,32,3,0.012385600060224534
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,64,3,0.012428800016641617
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,8,1,1,3,0.019556799530982973
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,4,3,0.01889919936656952
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,8,3,0.018619200587272643
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,16,3,0.018503999710083006
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,32,3,0.01852799952030182
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,64,3,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,8,1,1,7,0.013521599769592284
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,2,7,0.012960000336170197
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,4,7,0.012734399735927581
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,8,7,0.012454400211572647
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,16,7,0.012342400103807449
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,32,7,0.012401600182056428
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,64,7,0.012355200201272964
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,8,1,1,7,0.019515199959278105
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,2,7,0.019023999571800232
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,4,7,0.01875839978456497
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,8,7,0.01858240067958832
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,16,7,0.018513600528240203
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,32,7,0.01849440038204193
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,64,7,0.01841759979724884
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,8,1,1,15,0.013518400490283966
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,2,15,0.013096000254154205
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,4,15,0.012692800164222718
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,8,15,0.012467200309038163
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,16,15,0.012368000298738479
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,32,15,0.012481600046157837
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,64,15,0.012516799569129943
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,8,1,1,15,0.01971199959516525
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,2,15,0.019174399971961974
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,4,15,0.018777599930763243
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,8,15,0.018587200343608855
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,16,15,0.018369600176811218
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,32,15,0.018459199368953703
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,64,15,0.018639999628067016
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,8,1,1,31,0.013686400651931763
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,2,31,0.013096000254154205
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,4,31,0.012761600315570831
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,8,31,0.012470400333404541
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,16,31,0.012436799705028534
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,32,31,0.012251199781894683
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,64,31,0.012457600235939026
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,8,1,1,31,0.019966399669647215
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,2,31,0.019329600036144257
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,4,31,0.01880960017442703
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,8,31,0.018529599905014037
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,16,31,0.01855199933052063
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,32,31,0.018534399569034576
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,64,31,0.01844480037689209
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,8,1,1,63,0.013604800403118133
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,2,63,0.013352000713348388
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,4,63,0.013012799620628356
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,8,63,0.012518399953842163
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,16,63,0.012540799379348756
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,32,63,0.012358400225639343
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,64,63,0.012432000041007996
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,8,1,1,63,0.020206399261951447
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,2,63,0.01977279931306839
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,4,63,0.019139200448989868
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,8,63,0.01884640008211136
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,16,63,0.01881760060787201
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,32,63,0.018782399594783783
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,64,63,0.01875839978456497
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,8,1,1,127,0.01523520052433014
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,2,127,0.015011200308799743
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,4,127,0.014484800398349762
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,8,127,0.014251199364662171
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,16,127,0.014235199987888336
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,32,127,0.014108799397945404
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,64,127,0.014246399700641631
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,8,1,1,127,0.022148799896240235
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,2,127,0.02162559926509857
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,4,127,0.021236799657344818
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,8,127,0.021044799685478212
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,16,127,0.020868800580501556
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,32,127,0.02093279957771301
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,64,127,0.020703999698162077
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,8,1,1,255,0.019470399618148802
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,2,255,0.018052799999713896
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,4,255,0.017716799676418305
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,8,255,0.017552000284194947
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,16,255,0.017310400307178498
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,32,255,0.017313599586486816
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,64,255,0.017417599260807038
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,8,1,1,255,0.02670240104198456
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,2,255,0.026548799872398377
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,4,255,0.026155200600624085
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,8,255,0.02582559883594513
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,16,255,0.025628799200057985
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,32,255,0.02570880055427551
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,64,255,0.025462400913238526
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,8,1,1,511,0.02202879935503006
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,2,511,0.019976000487804412
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,4,511,0.0186831995844841
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,8,511,0.017817600071430205
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,16,511,0.017472000420093538
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,32,511,0.018459199368953703
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,64,511,0.01855359971523285
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,8,1,1,511,0.03355840146541596
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,2,511,0.03160960078239441
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,4,511,0.030272001028060914
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,8,511,0.02935360074043274
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,16,511,0.029126399755477907
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,32,511,0.03017280101776123
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,64,511,0.03028320074081421
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,8,1,1,1023,0.02433599978685379
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,2,1023,0.02163040041923523
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,4,1023,0.019566400349140166
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,8,1023,0.018833599984645844
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,16,1023,0.01836320012807846
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,32,1023,0.01887200027704239
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,64,1023,0.019308799505233766
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,8,1,1,1023,0.043372800946235655
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,2,1023,0.039396798610687254
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,4,1023,0.03704800009727478
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,8,1023,0.03614239990711212
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,16,1023,0.035918399691581726
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,32,1023,0.03670240044593811
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,64,1023,0.036847999691963194
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,8,1,1,2047,0.029158401489257812
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,2,2047,0.024979199469089507
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,4,2047,0.021404799818992615
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,8,2047,0.020751999318599702
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,16,2047,0.020398400723934174
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,32,2047,0.020828799903392793
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,64,2047,0.02078399956226349
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,8,1,1,2047,0.06454240083694458
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,2,2047,0.06038399934768677
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,4,2047,0.05290399789810181
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,8,2047,0.052534401416778564
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,16,2047,0.052035200595855716
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,32,2047,0.05071840286254883
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,64,2047,0.05095679759979248
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,8,1,1,4095,0.03855839967727661
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,2,4095,0.032688000798225404
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,4,4095,0.026713600754737853
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,8,4095,0.025356799364089966
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,16,4095,0.024984000623226164
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,32,4095,0.02582719922065735
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,64,4095,0.026524800062179565
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,8,1,1,4095,0.09856160283088684
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,2,4095,0.09333279728889465
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,4,4095,0.08988639712333679
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,8,4095,0.08885279893875123
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,16,4095,0.08914560079574585
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,32,4095,0.09047679901123047
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,64,4095,0.09025760293006897
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,8,1,1,8191,0.0522816002368927
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,2,8191,0.04359680116176605
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,4,8191,0.039366400241851805
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,8,8191,0.03707360029220581
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,16,8191,0.03630560040473938
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,32,8191,0.04069760143756866
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,64,8191,0.04129759967327118
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,16,8191,0.15362399816513062
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,8,1,1,8191,0.16926720142364501
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,2,8191,0.15843839645385743
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,4,8191,0.15601439476013185
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,8,8191,0.1541792035102844
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,32,8191,0.1583024024963379
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,64,8191,0.15858080387115478
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,8,1,1,16383,0.07849599719047547
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,2,16383,0.061692798137664796
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,4,16383,0.05780799984931946
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,8,16383,0.05550720095634461
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,16,16383,0.05418400168418884
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,32,16383,0.05861759781837463
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,64,16383,0.05882880091667175
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,8,1,1,16383,0.3038367986679077
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,2,16383,0.2873215913772583
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,4,16383,0.28480958938598633
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,8,16383,0.28304479122161863
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,16,16383,0.2825648069381714
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,32,16383,0.28670239448547363
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,64,16383,0.28730719089508056
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,8,1,1,32767,0.1329632043838501
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,2,32767,0.0979744017124176
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,4,32767,0.09324799776077271
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,8,32767,0.09020959734916686
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,16,32767,0.09044319987297059
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,32,32767,0.09411519765853882
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,64,32767,0.09485759735107421
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,8,1,1,32767,0.5743567943572998
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,2,32767,0.5407360076904297
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,4,32767,0.5363823890686035
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,8,1,1,65535,0.2425328016281128
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,8,32767,0.5360559940338134
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,16,32767,0.5350624084472656
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,64,32767,0.5389567852020264
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,32,32767,0.5379151821136474
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,2,65535,0.16954079866409302
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,4,65535,0.16223360300064088
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,8,65535,0.15907039642333984
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,16,65535,0.15714559555053711
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,32,65535,0.161844801902771
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,64,65535,0.16400159597396852
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,8,1,1,65535,1.1082528114318848
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,2,65535,1.0402015686035155
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,4,65535,1.0362704277038575
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,8,65535,1.034921646118164
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,16,65535,1.0352767944335937
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,8,1,1,131071,0.4669407844543457
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,32,65535,1.0383567810058594
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,8,1,2,131071,0.3074512004852295
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,64,65535,1.0399248123168945
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,8,1,4,131071,0.30045280456542967
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,8,1,8,131071,0.2980175971984863
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,8,1,16,131071,0.294867205619812
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,8,1,32,131071,0.2983520030975342
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,8,1,64,131071,0.30145440101623533
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,16,1,1,1,0.015030400454998016
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,2,1,0.01358720064163208
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,8,1,1,131071,2.18546085357666
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,8,1,2,131071,2.041868782043457
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,4,1,0.013076800107955932
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,8,1,4,131071,2.037286376953125
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,8,1,8,131071,2.0361488342285154
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,8,1,0.01284320056438446
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,8,1,16,131071,2.036689567565918
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,8,1,32,131071,2.0385536193847655
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,16,1,0.012604799866676331
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,32,1,0.012598399817943574
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,8,1,64,131071,2.040456008911133
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,64,1,0.012513600289821625
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,16,1,1,1,0.020627200603485107
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,2,1,0.019215999543666838
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,4,1,0.018777599930763243
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,8,1,0.018585599958896637
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,16,1,0.018320000171661376
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,32,1,0.018409599363803864
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,64,1,0.018262399733066557
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,16,1,1,3,0.015118399262428283
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,2,3,0.013302400708198547
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,4,3,0.013079999387264252
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,8,3,0.01268800050020218
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,16,3,0.012508800625801087
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,32,3,0.01242400035262108
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,64,3,0.012750400602817536
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,16,1,1,3,0.02067520022392273
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,2,3,0.019406400620937347
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,4,3,0.01873439997434616
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,8,3,0.01842560023069382
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,16,3,0.018555200099945067
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,32,3,0.018193599581718446
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,64,3,0.018302400410175324
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,16,1,1,7,0.014679999649524688
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,2,7,0.013385599851608277
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,4,7,0.012988799810409546
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,8,7,0.012726399302482604
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,16,7,0.012441600114107132
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,32,7,0.012468799948692322
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,64,7,0.012782399356365205
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,16,1,1,7,0.02066880017518997
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,2,7,0.01937599927186966
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,4,7,0.01866399943828583
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,8,7,0.018433600664138794
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,16,7,0.018276800215244294
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,32,7,0.018411199748516082
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,64,7,0.018607999384403228
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,16,1,1,15,0.015321600437164306
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,2,15,0.013566400110721587
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,4,15,0.013023999333381654
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,8,15,0.012665599584579468
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,16,15,0.012435200065374375
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,32,15,0.012606400251388549
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,64,15,0.012868799269199371
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,16,1,1,15,0.020606400072574617
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,2,15,0.019492800533771514
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,4,15,0.018849599361419677
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,8,15,0.018464000523090364
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,16,15,0.01846559941768646
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,32,15,0.01852799952030182
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,64,15,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,16,1,1,31,0.015108799934387207
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,2,31,0.014281600713729858
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,4,31,0.013022400438785553
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,8,31,0.012775999307632447
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,16,31,0.012614400684833526
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,32,31,0.01281760036945343
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,64,31,0.012751999497413635
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,16,1,1,31,0.02091359943151474
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,2,31,0.019683200120925903
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,4,31,0.01927199959754944
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,8,31,0.01886720061302185
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,16,31,0.018964800238609313
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,32,31,0.018614399433135986
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,64,31,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,16,1,1,63,0.015065599977970124
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,2,63,0.01472959965467453
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,4,63,0.013027200102806091
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,8,63,0.01282079964876175
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,16,63,0.01281919926404953
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,32,63,0.013155199587345123
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,64,63,0.013105599582195282
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,16,1,1,63,0.02152319997549057
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,2,63,0.021198399364948273
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,4,63,0.019964799284934998
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,8,63,0.019121600687503813
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,16,63,0.01932159960269928
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,32,63,0.019411200284957887
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,64,63,0.01905120015144348
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,16,1,1,127,0.01656160056591034
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,2,127,0.01621599942445755
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,2,127,0.023489600419998168
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,4,127,0.015209600329399109
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,8,127,0.014532800018787383
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,16,127,0.014425599575042724
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,32,127,0.014345599710941315
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,64,127,0.014375999569892883
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,16,1,1,127,0.024396799504756927
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,4,127,0.02290239930152893
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,8,127,0.022867199778556824
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,16,127,0.022776000201702118
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,32,127,0.02279199957847595
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,64,127,0.022707200050354003
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,16,1,1,255,0.019678400456905366
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,2,255,0.018731200695037843
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,4,255,0.017844800651073457
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,8,255,0.017862400412559508
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,16,255,0.01746399998664856
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,32,255,0.017960000038146972
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,64,255,0.017529599368572235
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,16,1,1,255,0.030611199140548707
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,2,255,0.02948319911956787
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,4,255,0.029047998785972595
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,8,255,0.028860801458358766
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,16,255,0.02871519923210144
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,32,255,0.02873600125312805
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,64,255,0.028624001145362853
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,16,1,1,511,0.026708799600601196
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,2,511,0.02282399982213974
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,4,511,0.019446399807929993
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,8,511,0.01881919950246811
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,16,511,0.017977599799633027
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,32,511,0.019395199418067933
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,64,511,0.01937279999256134
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,16,1,1,511,0.04425759911537171
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,2,511,0.0399071991443634
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,4,511,0.03715200126171112
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,8,511,0.03576320111751556
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,16,511,0.035545599460601804
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,32,511,0.03656159937381744
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,64,511,0.03668160140514374
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,16,1,1,1023,0.03162400126457214
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,2,1023,0.02563199996948242
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,4,1023,0.02160319983959198
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,8,1023,0.02051360011100769
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,16,1023,0.02011519968509674
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,32,1023,0.020948800444602966
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,64,1023,0.02112639993429184
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,16,1,1,1023,0.06606720089912414
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,2,1023,0.05945919752120972
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,4,1023,0.05575839877128601
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,8,1023,0.05158079862594604
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,16,1023,0.04983200132846832
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,32,1023,0.050318402051925656
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,64,1023,0.050543999671936034
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,16,1,1,2047,0.04072799980640411
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,2,2047,0.03311040103435516
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,4,2047,0.02717919945716858
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,8,2047,0.024600000679492952
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,16,2047,0.02341119945049286
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,4,2047,0.09002559781074523
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,32,2047,0.024635200202465058
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,64,2047,0.02462559938430786
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,16,1,1,2047,0.10042400360107422
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,64,2047,0.08890079855918884
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,2,2047,0.09376479983329773
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,8,2047,0.08809279799461364
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,16,2047,0.08774560093879699
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,32,2047,0.08890720009803772
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,16,1,1,4095,0.05395680069923401
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,2,4095,0.044833600521087646
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,4,4095,0.03875040113925934
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,8,4095,0.03707199990749359
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,16,4095,0.03567520081996918
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,32,4095,0.037622401118278505
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,64,4095,0.038315200805664064
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,16,1,1,4095,0.17049599885940553
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,64,4095,0.15505759716033934
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,2,4095,0.1587455987930298
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,4,4095,0.15440319776535033
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,8,4095,0.15283679962158203
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,16,4095,0.15200480222702026
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,32,4095,0.1544543981552124
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,16,1,1,8191,0.08079360127449035
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,2,8191,0.06345279812812805
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,4,8191,0.05726400017738342
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,8,8191,0.0550495982170105
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,16,8191,0.05373600125312805
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,32,8191,0.0582256019115448
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,64,8191,0.05925760269165039
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,16,8191,0.2818383932113647
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,16,1,1,8191,0.3053839921951294
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,2,8191,0.28796958923339844
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,4,8191,0.28377280235290525
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,8,8191,0.282475209236145
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,32,8191,0.2857487916946411
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,64,8191,0.2871295928955078
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,16,1,1,16383,0.1380288004875183
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,2,16383,0.09822880029678345
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,4,16383,0.09231200218200683
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,8,16383,0.0907151997089386
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,16,16383,0.08867200016975403
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,32,16383,0.09439520239830017
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,64,16383,0.09530879855155945
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,16,1,1,16383,0.5766784191131592
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,2,16383,0.5416975975036621
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,4,16383,0.5362959861755371
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,8,16383,0.534446382522583
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,16,16383,0.5339263916015625
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,32,16383,0.5387343883514404
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,16,32767,0.15580480098724364
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,64,16383,0.5395487785339356
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,16,1,1,32767,0.24608321189880372
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,2,32767,0.16698880195617677
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,4,32767,0.16110719442367555
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,8,32767,0.15886080265045166
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,32,32767,0.16139039993286133
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,64,32767,0.1621600031852722
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,16,1,1,32767,1.1121312141418458
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,2,32767,1.0410127639770508
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,4,32767,1.0369152069091796
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,8,32767,1.0343647956848145
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,16,32767,1.0362591743469238
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,32,32767,1.0392191886901856
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,16,1,1,65535,0.46849918365478516
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,64,32767,1.0404704093933106
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,2,65535,0.30185279846191404
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,4,65535,0.2978912115097046
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,8,65535,0.29315838813781736
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,16,65535,0.2912928104400635
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,32,65535,0.29544479846954347
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,64,65535,0.2961872100830078
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,16,1,1,65535,2.189468765258789
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,2,65535,2.038470458984375
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,4,65535,2.034543991088867
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,16,1,1,131071,0.9161919593811035
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,8,65535,2.0353872299194338
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,16,65535,2.0314783096313476
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,32,65535,2.036643218994141
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,64,65535,2.0382848739624024
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,16,1,2,131071,0.5870880126953125
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,16,1,4,131071,0.5684879779815674
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,16,1,16,131071,0.5625264167785644
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,16,1,8,131071,0.559939193725586
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,16,1,32,131071,0.5736256122589112
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,16,1,64,131071,0.5681215763092041
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,16,1,2,131071,4.059163284301758
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,16,1,1,131071,4.345102310180664
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,16,1,4,131071,4.050703811645508
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,16,1,8,131071,4.056025695800781
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,16,1,16,131071,4.031158447265625
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,32,1,1,1,0.01573439985513687
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,16,1,32,131071,4.036495971679687
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,16,1,0.013424000144004822
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,2,1,0.01475680023431778
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,16,1,64,131071,4.055535888671875
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,4,1,0.013699199259281158
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,8,1,0.013704000413417817
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,32,1,0.013208000361919403
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,64,1,0.013435199856758118
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,32,1,1,1,0.02150239944458008
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,2,1,0.020531199872493744
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,4,1,0.019409599900245666
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,8,1,0.01947679966688156
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,16,1,0.019356800615787505
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,32,1,0.019385600090026857
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,64,1,0.019364799559116363
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,32,1,1,3,0.01579679995775223
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,2,3,0.014654399454593658
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,4,3,0.013784000277519226
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,8,3,0.013369600474834441
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,16,3,0.013324800133705138
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,32,3,0.013307200372219085
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,64,3,0.01342719942331314
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,32,1,1,3,0.021620799601078034
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,2,3,0.021012799441814424
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,4,3,0.020132799446582795
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,8,3,0.019657599925994872
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,16,3,0.019395199418067933
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,32,3,0.019662399590015412
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,64,3,0.019118399918079378
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,32,1,1,7,0.015902400016784668
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,2,7,0.014843200147151948
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,4,7,0.013676799833774567
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,8,7,0.013897599279880523
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,16,7,0.01342719942331314
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,32,7,0.013275200128555298
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,64,7,0.013206399977207184
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,32,1,1,7,0.021716800332069398
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,2,7,0.02112320065498352
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,4,7,0.019827200472354888
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,8,7,0.019755199551582336
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,16,7,0.0195375993847847
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,32,7,0.019411200284957887
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,64,7,0.01942880004644394
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,32,1,1,15,0.01547040045261383
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,2,15,0.01481119990348816
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,4,15,0.01422239989042282
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,8,15,0.013486400246620178
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,8,15,0.01968960016965866
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,16,15,0.013515199720859527
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,32,15,0.013500800728797913
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,64,15,0.013631999492645264
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,32,1,1,15,0.0221328005194664
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,2,15,0.020744000375270844
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,4,15,0.019918400049209594
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,16,15,0.019499200582504272
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,32,15,0.019336000084877014
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,64,15,0.0198512002825737
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,32,1,1,31,0.015539200603961944
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,2,31,0.014923200011253357
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,4,31,0.01387840062379837
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,8,31,0.013628800213336945
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,16,31,0.013414399325847625
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,32,31,0.013622400164604188
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,64,31,0.0133200004696846
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,32,1,1,31,0.021796800196170807
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,2,31,0.02152799963951111
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,4,31,0.020252799987792967
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,8,31,0.020216000080108643
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,16,31,0.01979999989271164
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,32,31,0.01992959976196289
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,32,63,0.013806399703025819
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,64,31,0.020103999972343446
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,32,1,1,63,0.015428799390792846
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,2,63,0.016017599403858183
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,4,63,0.014353600144386292
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,8,63,0.014035199582576752
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,16,63,0.01388159990310669
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,64,63,0.013956800103187561
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,32,1,1,63,0.023710399866104126
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,2,63,0.023379200696945192
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,4,63,0.022536000609397887
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,8,63,0.022412799298763275
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,16,63,0.02218399941921234
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,32,63,0.02206239998340607
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,64,63,0.022305600345134735
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,32,1,1,127,0.0176704004406929
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,2,127,0.01709440052509308
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,2,127,0.027822399139404298
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,4,127,0.01589920073747635
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,8,127,0.01565759927034378
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,16,127,0.01552480012178421
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,32,127,0.015475200116634369
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,64,127,0.015569600462913512
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,32,1,1,127,0.02852639853954315
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,4,127,0.026848000288009644
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,8,127,0.02674719989299774
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,16,127,0.026664000749588013
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,32,127,0.026769599318504332
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,64,127,0.02659359872341156
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,32,1,1,255,0.020664000511169435
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,2,255,0.020078399777412416
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,4,255,0.018883199989795686
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,8,255,0.01865600049495697
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,2,255,0.03676159977912903
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,16,255,0.018991999328136444
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,32,255,0.018824000656604768
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,64,255,0.01897599995136261
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,32,1,1,255,0.03720319867134094
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,4,255,0.03568480014801025
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,8,255,0.03559040129184723
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,16,255,0.03564000129699707
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,32,255,0.03571679890155792
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,64,255,0.03586399853229523
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,32,1,1,511,0.03585279881954193
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,2,511,0.027886399626731874
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,4,511,0.02292640060186386
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,8,511,0.02083359956741333
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,16,511,0.02088800072669983
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,32,511,0.021985599398612977
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,64,511,0.02197919934988022
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,32,1,1,511,0.06999520063400269
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,2,511,0.06233760118484497
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,4,511,0.05573440194129944
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,8,511,0.05113599896430969
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,16,511,0.05090720057487488
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,32,511,0.05154240131378174
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,64,511,0.05160800218582153
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,2,1023,0.03631199896335602
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,4,1023,0.02752479910850525
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,32,1,1,1023,0.04400320053100586
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,8,1023,0.023895999789237975
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,16,1023,0.023020799458026885
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,32,1023,0.023972800374031066
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,64,1023,0.023918400704860687
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,32,1,1,1023,0.10452480316162109
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,2,1023,0.09584640264511109
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,4,1023,0.09010400176048279
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,8,1023,0.08855680227279664
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,8,2047,0.03791840076446533
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,16,1023,0.08714879751205444
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,32,1023,0.08763039708137513
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,64,1023,0.08824639916419982
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,32,1,1,2047,0.058001601696014406
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,2,2047,0.047654399275779726
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,4,2047,0.04025120139122009
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,16,2047,0.03641600012779236
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,32,2047,0.03712959885597229
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,64,2047,0.03772799968719483
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,32,1,1,2047,0.17367199659347535
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,2,2047,0.16144319772720336
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,4,2047,0.15567679405212403
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,8,2047,0.15405280590057374
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,16,2047,0.15289599895477296
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,32,2047,0.1545024037361145
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,64,2047,0.15394079685211182
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,32,1,1,4095,0.08600159883499145
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,2,4095,0.06577119827270508
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,4,4095,0.058627200126647946
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,8,4095,0.05581120252609253
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,16,4095,0.05440639853477478
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,32,4095,0.05623520016670227
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,64,4095,0.056841599941253665
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,32,1,1,4095,0.31003360748291015
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,2,4095,0.28987200260162355
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,4,4095,0.28492639064788816
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,8,4095,0.2836992025375366
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,16,4095,0.2827744007110596
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,32,4095,0.2848959922790527
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,64,4095,0.28465440273284914
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,32,1,1,8191,0.1421679973602295
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,2,8191,0.10077279806137085
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,4,8191,0.09467840194702148
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,8,8191,0.09095039963722229
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,32,1,1,8191,0.5806960105895996
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,16,8191,0.0898256003856659
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,32,8191,0.09519839882850648
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,64,8191,0.09612159729003907
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,2,8191,0.5438511848449707
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,32,8191,0.5394144058227539
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,4,8191,0.5374671936035156
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,8,8191,0.5358463764190674
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,16,8191,0.5339695930480957
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,64,8191,0.541758394241333
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,32,1,1,16383,0.24991679191589355
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,2,16383,0.1705008029937744
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,4,16383,0.16336640119552612
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,8,16383,0.16008319854736328
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,16,16383,0.15781439542770387
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,32,16383,0.16168320178985596
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,64,16383,0.16352479457855223
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,32,1,1,16383,1.1192367553710938
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,2,16383,1.041862392425537
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,4,16383,1.0389743804931642
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,8,16383,1.0354496002197267
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,16,16383,1.0350208282470703
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,32,16383,1.039748764038086
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,32,1,1,32767,0.4737840175628662
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,64,16383,1.0413904190063477
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,2,32767,0.3052783966064453
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,4,32767,0.3005824089050293
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,8,32767,0.2939136028289795
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,16,32767,0.2922976016998291
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,32,32767,0.2967695951461792
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,64,32767,0.29706079959869386
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,32,1,1,32767,2.1929311752319336
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,2,32767,2.041048049926758
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,4,32767,2.037227249145508
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,8,32767,2.032894325256348
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,16,32767,2.0328927993774415
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,32,32767,2.0368127822875977
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,64,32767,2.0375392913818358
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,32,1,2,65535,0.5798096179962158
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,32,1,1,65535,0.9257871627807617
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,32,1,4,65535,0.5677711963653564
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,32,1,8,65535,0.5673920154571533
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,32,1,16,65535,0.5653984069824218
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,32,1,32,65535,0.5681439876556397
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,32,1,64,65535,0.5637728214263916
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,64,1,1,1,0.016524800658226015
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,2,1,0.015777599811553956
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,4,1,0.014404800534248353
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,8,1,0.014502400159835815
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,16,1,0.014081600308418273
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,32,1,0.014230400323867798
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,64,1,0.014150400459766389
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,64,1,1,1,0.022977599501609804
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,32,1,2,65535,4.039572906494141
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,32,1,4,65535,4.115838241577149
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,2,1,0.02173759937286377
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,32,1,8,65535,4.035817718505859
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,32,1,1,65535,4.341360092163086
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,4,1,0.020377600193023683
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,32,1,0.02009119987487793
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,32,1,16,65535,4.035841751098633
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,8,1,0.02040479928255081
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,16,1,0.02050720006227493
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,32,1,32,65535,4.038155364990234
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,32,1,64,65535,4.0373790740966795
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,64,1,0.02025119960308075
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,64,1,1,3,0.016939200460910797
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,2,3,0.015732799470424653
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,4,3,0.014641599357128143
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,8,3,0.014180800318717957
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,16,3,0.014203199744224548
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,32,3,0.01408960074186325
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,64,3,0.014286400377750396
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,64,1,1,3,0.02305919975042343
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,2,3,0.02147199958562851
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,4,3,0.020431999862194062
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,8,3,0.02057439982891083
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,16,3,0.02009280025959015
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,32,3,0.02037599980831146
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,64,3,0.019944000244140624
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,64,1,1,7,0.016896000504493712
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,2,7,0.015406399965286255
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,4,7,0.014499199390411378
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,8,7,0.014254400134086609
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,16,7,0.014248000085353851
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,32,7,0.014532800018787383
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,64,7,0.01398719996213913
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,64,1,1,7,0.023177599906921385
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,2,7,0.022126400470733644
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,4,7,0.02046400010585785
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,8,7,0.020259200036525725
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,16,7,0.020403200387954713
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,32,7,0.020470400154590607
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,64,7,0.020340800285339355
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,64,1,1,15,0.01672479957342148
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,2,15,0.015966400504112244
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,4,15,0.014742399752140044
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,8,15,0.014369599521160126
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,16,15,0.014257599413394929
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,32,15,0.014399999380111694
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,64,15,0.014251199364662171
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,64,1,1,15,0.023313599824905395
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,2,15,0.021939200162887574
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,4,15,0.02114560008049011
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,8,15,0.02112320065498352
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,16,15,0.02059520035982132
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,32,15,0.020948800444602966
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,64,15,0.020772799849510193
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,64,1,1,31,0.016704000532627106
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,2,31,0.01558080017566681
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,64,1,1,31,0.02540639936923981
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,4,31,0.014694400131702423
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,8,31,0.014504000544548035
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,16,31,0.01449120044708252
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,32,31,0.014417600631713868
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,64,31,0.014206400513648987
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,2,31,0.023919999599456787
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,4,31,0.022844800353050233
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,64,1,1,63,0.016771200299263
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,8,31,0.022886399924755097
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,16,31,0.022787199914455415
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,32,31,0.022812800109386445
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,64,31,0.022652800381183624
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,2,63,0.01701119989156723
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,4,63,0.015167999267578124
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,8,63,0.014841599762439728
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,16,63,0.014873600006103516
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,32,63,0.01512639969587326
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,64,63,0.014564800262451171
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,64,1,1,63,0.027987200021743774
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,2,63,0.027300798892974855
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,4,63,0.026128000020980834
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,8,63,0.025974398851394652
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,16,63,0.025860801339149475
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,32,63,0.026132801175117494
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,64,63,0.02589600086212158
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,64,1,1,127,0.018571199476718904
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,2,127,0.018059200048446654
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,4,127,0.016769599914550782
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,8,127,0.01666879951953888
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,16,127,0.016540800034999848
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,32,127,0.0165120005607605
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,64,127,0.016441600024700166
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,64,1,1,127,0.03701280057430267
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,2,127,0.034774398803710936
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,4,127,0.03375039994716644
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,8,127,0.03341279923915863
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,16,127,0.033580800890922545
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,32,127,0.03357920050621033
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,64,127,0.033318400382995605
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,64,1,1,255,0.02261119931936264
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,2,255,0.021796800196170807
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,4,255,0.020179200172424316
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,8,255,0.019518400728702544
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,16,255,0.019840000569820403
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,32,255,0.019867199659347533
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,64,255,0.019756799936294554
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,64,1,1,255,0.05650079846382141
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,2,255,0.05270400047302246
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,4,255,0.049425598978996274
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,8,255,0.04872959852218628
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,16,255,0.04915040135383606
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,32,255,0.04883840084075928
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,64,255,0.04831039905548096
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,64,1,1,511,0.03678880035877228
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,2,511,0.03975200057029724
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,4,511,0.031625598669052124
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,8,511,0.02529599964618683
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,16,511,0.023932799696922302
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,32,511,0.025
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,64,511,0.025993600487709045
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,64,1,1,511,0.0985040009021759
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,2,511,0.0998848021030426
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,4,511,0.09233919978141784
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,8,511,0.08942400217056275
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,16,511,0.08757920265197754
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,32,511,0.08922880291938781
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,64,511,0.08918719887733459
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,64,1,1,1023,0.05005599856376648
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,2,1023,0.05128480195999145
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,4,1023,0.042467200756073
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,8,1023,0.03884640038013458
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,16,1023,0.03731360137462616
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,32,1023,0.03819999992847443
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,64,1023,0.038889598846435544
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,64,1,1,1023,0.16732159852981568
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,64,1023,0.15531519651412964
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,2,1023,0.1651360034942627
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,4,1023,0.1582335948944092
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,8,1023,0.1546880006790161
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,8,2047,0.057447999715805054
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,16,1023,0.1536064028739929
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,32,1023,0.15489920377731323
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,64,1,1,2047,0.07698240280151367
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,2,2047,0.06968799829483033
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,4,2047,0.06073920130729675
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,16,2047,0.05543360114097595
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,32,2047,0.055848002433776855
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,64,2047,0.05733280181884766
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,64,1,1,2047,0.3021728038787842
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,2,2047,0.2938143968582153
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,4,2047,0.2874864101409912
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,8,2047,0.28445279598236084
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,16,2047,0.2829567909240723
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,32,2047,0.28411839008331297
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,64,2047,0.2849695920944214
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,64,1,1,4095,0.13259199857711793
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,2,4095,0.10679359436035156
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,4,4095,0.09722239971160888
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,8,4095,0.09231359958648681
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,16,4095,0.08993600010871887
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,64,1,1,4095,0.5740719795227051
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,32,4095,0.09283360242843627
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,64,4095,0.0957152009010315
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,2,4095,0.5465871810913085
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,4,4095,0.5392655849456787
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,8,4095,0.5377679824829101
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,16,4095,0.5358672142028809
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,32,4095,0.5375055789947509
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,64,1,1,8191,0.24282240867614746
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,64,4095,0.5394864082336426
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,2,8191,0.1760527968406677
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,4,8191,0.16552640199661256
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,8,8191,0.1599679946899414
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,16,8191,0.15777120590209961
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,32,8191,0.16365439891815187
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,64,8191,0.16865919828414916
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,64,1,1,8191,1.107870388031006
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,2,8191,1.0502623558044433
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,4,8191,1.0401087760925294
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,8,8191,1.0363007545471192
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,16,8191,1.0341839790344238
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,32,8191,1.0423888206481933
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,2,16383,0.314847993850708
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,64,1,1,16383,0.4571104049682617
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,64,8191,1.0452336311340331
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,4,16383,0.3000976085662842
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,8,16383,0.296724796295166
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,16,16383,0.2921663999557495
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,32,16383,0.29846720695495604
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,64,16383,0.3055824041366577
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,2,16383,2.0469968795776365
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,64,1,1,16383,2.1796016693115234
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,4,16383,2.0379312515258787
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,8,16383,2.035508728027344
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,16,16383,2.031977653503418
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,32,16383,2.0397504806518554
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,64,16383,2.0449071884155274
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,64,1,1,32767,0.9192496299743652
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,64,1,2,32767,0.583571195602417
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,64,1,4,32767,0.5681104183197021
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,64,1,8,32767,0.563105583190918
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,64,1,32,32767,0.5657279968261719
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,64,1,16,32767,0.5627823829650879
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,64,1,64,32767,0.568235206604004
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,128,1,1,1,0.025705599784851076
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,2,1,0.017590400576591492
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,4,1,0.016187199950218202
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,8,1,0.016164800524711607
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,16,1,0.01586720049381256
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,32,1,0.01607999950647354
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,64,1,0.01586720049381256
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,128,1,1,1,0.032153600454330446
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,64,1,2,32767,4.076054382324219
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,64,1,1,32767,4.3467857360839846
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,64,1,4,32767,4.039116668701172
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,4,1,0.022393600642681123
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,2,1,0.02364159971475601
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,64,1,8,32767,4.034076690673828
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,64,1,32,32767,4.0389457702636715
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,64,1,16,32767,4.035865783691406
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,8,1,0.022339199483394623
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,64,1,64,32767,4.042063903808594
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,32,1,0.022441600263118745
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,16,1,0.021905599534511565
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,64,1,0.02210240066051483
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,128,1,1,3,0.025969600677490233
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,2,3,0.017523199319839478
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,8,3,0.01592160016298294
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,4,3,0.016209599375724793
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,16,3,0.016131199896335602
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,32,3,0.01603199988603592
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,64,3,0.01615999937057495
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,128,1,1,3,0.03238880038261414
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,2,3,0.023790399730205535
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,4,3,0.02221119999885559
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,8,3,0.02250239998102188
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,16,3,0.022294400632381438
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,32,3,0.02226720005273819
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,64,3,0.02258400022983551
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,128,1,1,7,0.025337600708007814
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,2,7,0.0177279993891716
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,4,7,0.016182400286197662
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,8,7,0.01581919938325882
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,16,7,0.01563359946012497
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,32,7,0.015828800201416016
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,64,7,0.015996800363063814
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,128,1,1,7,0.03310079872608185
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,2,7,0.023814399540424348
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,4,7,0.022969600558280946
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,8,7,0.022833600640296936
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,16,7,0.022316800057888032
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,32,7,0.022364799678325654
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,64,7,0.02248319983482361
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,128,1,1,15,0.025939199328422546
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,2,15,0.017532800137996674
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,4,15,0.016564799845218657
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,8,15,0.016326400637626647
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,16,15,0.01597920060157776
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,32,15,0.01584320068359375
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,64,15,0.01621920019388199
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,128,1,1,15,0.03624320030212402
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,2,15,0.025974398851394652
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,4,15,0.024868799746036528
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,8,15,0.024907200038433074
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,16,15,0.0247871994972229
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,32,15,0.024489599466323852
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,64,15,0.024695999920368195
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,128,1,1,31,0.02802720069885254
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,2,31,0.01794240027666092
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,4,31,0.016489599645137788
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,8,31,0.015999999642372132
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,16,31,0.0160848006606102
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,32,31,0.01595200002193451
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,64,31,0.016344000399112702
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,128,1,1,31,0.04130240082740784
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,2,31,0.028939199447631837
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,4,31,0.02778880000114441
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,8,31,0.02717599868774414
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,16,31,0.027555200457572936
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,32,31,0.02723039984703064
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,64,31,0.02736159861087799
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,128,1,1,63,0.028675198554992676
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,2,63,0.01808159947395325
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,4,63,0.017084799706935883
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,8,63,0.016641600430011748
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,16,63,0.0168272003531456
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,32,63,0.0164015993475914
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,64,63,0.016683200001716615
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,128,1,1,63,0.04813120067119599
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,2,63,0.035918399691581726
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,4,63,0.03431519865989685
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,8,63,0.03333280086517334
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,16,63,0.033748799562454225
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,32,63,0.03328959941864014
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,64,63,0.03344640135765076
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,128,1,1,127,0.03607519865036011
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,2,127,0.020955200493335723
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,4,127,0.019782400131225585
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,8,127,0.018408000469207764
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,16,127,0.019047999382019044
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,32,127,0.01836480051279068
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,64,127,0.01854719966650009
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,32,127,0.04812160134315491
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,128,1,1,127,0.06776319742202759
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,2,127,0.05454559922218323
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,4,127,0.050491201877594
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,8,127,0.04819679856300354
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,16,127,0.048132801055908205
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,64,127,0.04782719910144806
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,128,1,1,255,0.04208320081233978
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,64,255,0.02144480049610138
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,2,255,0.026759999990463256
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,4,255,0.024636800587177276
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,8,255,0.022401599586009978
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,16,255,0.021617600321769716
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,32,255,0.021460799872875212
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,128,1,1,255,0.10256479978561402
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,2,255,0.08824639916419982
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,128,1,1,511,0.06377599835395813
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,2,511,0.04344159960746765
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,4,255,0.08601279854774475
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,8,255,0.08411359786987305
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,16,255,0.08376479744911194
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,32,255,0.08399999737739564
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,64,255,0.08451039791107177
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,8,511,0.036164799332618715
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,4,511,0.03930880129337311
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,16,511,0.03653599917888641
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,32,511,0.037049600481987
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,64,511,0.036392000317573545
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,128,1,1,511,0.1812608003616333
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,2,511,0.16017919778823853
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,4,511,0.1555248022079468
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,8,511,0.15394560098648072
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,16,511,0.15307040214538575
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,32,511,0.15294079780578612
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,64,511,0.15333919525146483
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,128,1,1,1023,0.0916208028793335
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,2,1023,0.06143519878387451
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,4,1023,0.05729759931564331
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,8,1023,0.055473601818084715
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,16,1023,0.05460960268974304
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,32,1023,0.05391839742660522
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,64,1023,0.05422239899635315
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,128,1,1,1023,0.31542720794677737
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,2,1023,0.2883023977279663
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,4,1023,0.2857808113098145
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,8,1023,0.28353760242462156
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,4,2047,0.09242720007896424
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,16,1023,0.282859206199646
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,32,1023,0.28179519176483153
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,64,1023,0.28260159492492676
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,128,1,1,2047,0.14561280012130737
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,2,2047,0.0978767991065979
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,8,2047,0.09059839844703674
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,16,2047,0.09028480052947999
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,32,2047,0.08863999843597412
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,64,2047,0.09001759886741638
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,128,1,1,2047,0.58439040184021
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,2,2047,0.5398831844329834
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,4,2047,0.537499189376831
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,8,2047,0.5355760097503662
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,16,2047,0.5348127841949463
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,32,2047,0.5353504180908203
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,64,2047,0.534662389755249
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,128,1,1,4095,0.2565743923187256
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,2,4095,0.1661344051361084
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,4,4095,0.1607391953468323
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,8,4095,0.1588703989982605
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,16,4095,0.15776959657669068
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,32,4095,0.1576799988746643
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,64,4095,0.1554640054702759
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,128,1,1,4095,1.123788833618164
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,2,4095,1.0408432006835937
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,4,4095,1.0371487617492676
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,8,4095,1.0354432106018066
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,16,4095,1.0357263565063477
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,32,4095,1.035910415649414
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,64,4095,1.035086441040039
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,128,1,1,8191,0.4794896125793457
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,2,8191,0.31624000072479247
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,4,8191,0.29576959609985354
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,8,8191,0.29293599128723147
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,16,8191,0.29315359592437745
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,32,8191,0.2922976016998291
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,64,8191,0.2899456024169922
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,128,1,1,8191,2.1956096649169923
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,2,8191,2.036966323852539
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,4,8191,2.0350271224975587
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,8,8191,2.031987190246582
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,16,8191,2.0329120635986326
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,32,8191,2.0318016052246093
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,64,8191,2.033033561706543
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,128,1,2,16383,0.6002016067504883
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,128,1,1,16383,0.9301600456237793
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,128,1,4,16383,0.5684351921081543
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,128,1,8,16383,0.5606143951416016
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,128,1,16,16383,0.5600543975830078
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,128,1,32,16383,0.5647151947021485
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,128,1,64,16383,0.5645040035247803
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,256,1,1,1,0.049667200446128844
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,2,1,0.026609599590301514
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,4,1,0.02399519979953766
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,8,1,0.023263999819755556
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,16,1,0.02316640019416809
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,32,1,0.022833600640296936
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,64,1,0.02274720072746277
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,128,1,1,16383,4.346236801147461
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,128,1,2,16383,4.037859344482422
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,256,1,1,1,0.05629119873046875
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,2,1,0.03612479865550995
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,128,1,4,16383,4.072246551513672
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,128,1,8,16383,4.0313678741455075
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,128,1,16,16383,4.030564880371093
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,4,1,0.03012160062789917
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,128,1,32,16383,4.032438278198242
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,8,1,0.029865598678588866
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,128,1,64,16383,4.033063888549805
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,16,1,0.029388800263404846
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,32,1,0.029232001304626463
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,64,1,0.02895359992980957
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,256,1,1,3,0.04930399954319
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,2,3,0.02560960054397583
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,4,3,0.024240000545978545
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,8,3,0.02319519966840744
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,16,3,0.022852799296379088
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,32,3,0.0226623997092247
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,64,3,0.02326720058917999
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,256,1,1,3,0.056764799356460574
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,2,3,0.033790400624275206
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,4,3,0.030737599730491637
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,8,3,0.030055999755859375
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,16,3,0.02985759973526001
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,32,3,0.02985120117664337
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,64,3,0.029542401432991028
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,256,1,1,7,0.048876801133155824
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,2,7,0.026609599590301514
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,4,7,0.02375199943780899
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,8,7,0.023255999386310577
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,16,7,0.022681599855422972
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,32,7,0.022993600368499754
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,64,7,0.023048000037670137
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,256,1,1,7,0.0591759979724884
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,2,7,0.0360368013381958
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,4,7,0.03250400125980377
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,8,7,0.032180801033973694
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,16,7,0.031841599941253663
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,32,7,0.03147839903831482
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,64,7,0.031409600377082826
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,256,1,1,15,0.04863199889659882
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,2,15,0.026060798764228822
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,4,15,0.024527999758720397
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,8,15,0.023651200532913207
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,16,15,0.022752000391483305
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,32,15,0.023425599932670592
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,64,15,0.02287999987602234
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,256,1,1,15,0.06228320002555847
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,2,15,0.040524798631668094
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,4,15,0.03550240099430084
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,8,15,0.03478240072727203
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,16,15,0.03410719931125641
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,32,15,0.034164801239967346
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,64,15,0.03430080115795135
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,32,31,0.02287199944257736
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,256,1,1,31,0.05004799962043762
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,2,31,0.027166399359703063
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,4,31,0.02418559938669205
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,8,31,0.02354719936847687
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,16,31,0.02340639978647232
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,64,31,0.022856000065803527
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,256,1,1,31,0.06923040151596069
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,2,31,0.04933759868144989
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,4,31,0.04280639886856079
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,8,31,0.040591999888420105
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,16,31,0.040249601006507874
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,32,31,0.0402864009141922
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,64,31,0.03989759981632233
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,256,1,1,63,0.05297279953956604
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,2,63,0.03430399894714355
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,4,63,0.02544800043106079
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,8,63,0.02398560047149658
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,16,63,0.023603199422359465
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,32,63,0.023622399568557738
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,64,63,0.023479999601840974
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,256,1,1,63,0.0853551983833313
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,2,63,0.06592320203781128
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,4,63,0.05926560163497925
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,8,63,0.05631840229034424
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,16,63,0.05333439707756042
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,32,63,0.05298720002174377
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,8,127,0.030151998996734618
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,64,63,0.05322239995002746
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,256,1,1,127,0.06023200154304505
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,2,127,0.03892639875411987
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,4,127,0.03452000021934509
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,16,127,0.027856001257896425
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,32,127,0.028097599744796753
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,64,127,0.027291199564933775
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,256,1,1,127,0.12138880491256714
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,2,127,0.09938560128211975
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,256,1,1,255,0.07361279726028443
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,4,127,0.09555839896202087
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,8,127,0.09260799884796142
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,16,127,0.09146080017089844
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,32,127,0.09095680117607116
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,64,127,0.0906544029712677
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,2,255,0.0484688013792038
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,256,1,1,255,0.18969600200653075
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,4,255,0.04333280026912689
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,8,255,0.04101920127868652
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,16,255,0.04079520106315613
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,32,255,0.0403903990983963
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,64,255,0.040031999349594116
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,2,255,0.16401599645614623
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,4,255,0.16028800010681152
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,8,255,0.15725120306015014
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,16,255,0.15771839618682862
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,32,255,0.15693919658660888
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,64,255,0.15655839443206787
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,256,1,1,511,0.11526559591293335
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,2,511,0.07580000162124634
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,4,511,0.06779999732971191
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,8,511,0.06433600187301636
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,16,511,0.06189119815826416
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,32,511,0.06123200058937073
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,64,511,0.06133919954299927
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,256,1,1,511,0.34002079963684084
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,2,511,0.30230560302734377
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,4,511,0.296396803855896
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,8,511,0.2923903942108154
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,16,511,0.2908560037612915
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,32,511,0.29051039218902586
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,64,511,0.28954238891601564
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,256,1,1,1023,0.1698032021522522
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,2,1023,0.11175999641418458
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,4,1023,0.10225440263748169
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,8,1023,0.09898080229759217
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,16,1023,0.09620320200920104
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,32,1023,0.09539200067520141
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,64,1023,0.09545760154724121
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,256,1,1,1023,0.6100671768188477
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,2,1023,0.5543583869934082
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,4,1023,0.5475359916687011
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,8,1023,0.5438144207000732
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,16,1023,0.5413136005401611
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,32,1023,0.5411680221557618
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,64,1023,0.5408463954925538
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,256,1,1,2047,0.2761375904083252
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,2,2047,0.1792304039001465
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,4,2047,0.17003040313720702
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,8,2047,0.1660207986831665
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,16,2047,0.16233919858932494
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,32,2047,0.1629024028778076
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,64,2047,0.16225119829177856
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,256,1,1,2047,1.1465359687805177
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,2,2047,1.054742431640625
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,4,2047,1.0459327697753906
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,16,2047,1.041766357421875
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,8,2047,1.0426719665527344
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,32,2047,1.0407024383544923
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,64,2047,1.03995361328125
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,2,4095,0.32928640842437745
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,4,4095,0.3087935924530029
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,256,1,1,4095,0.5056992053985596
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,8,4095,0.3012480020523071
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,16,4095,0.29720640182495117
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,32,4095,0.29718880653381347
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,64,4095,0.2959568023681641
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,256,1,1,4095,2.2287248611450194
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,2,4095,2.0494943618774415
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,4,4095,2.0421600341796875
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,8,4095,2.0408960342407227
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,16,4095,2.041230392456055
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,32,4095,2.0375392913818358
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,64,4095,2.038827133178711
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,256,1,2,8191,0.6351136207580567
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,256,1,1,8191,0.9749247550964355
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,256,1,4,8191,0.5936304092407226
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,256,1,8,8191,0.5681647777557373
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,256,1,16,8191,0.5636496067047119
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,256,1,32,8191,0.570033597946167
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,256,1,64,8191,0.5616479873657226
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,256,1,2,8191,4.050582504272461
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,256,1,1,8191,4.381459045410156
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,256,1,4,8191,4.056174468994141
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,256,1,8,8191,4.034719848632813
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,256,1,16,8191,4.034790420532227
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,256,1,32,8191,4.033196640014649
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,256,1,64,8191,4.030503845214843
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,512,1,1,1,0.08700479865074158
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,2,1,0.05104799866676331
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,4,1,0.042044800519943235
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,32,1,0.037483200430870056
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,16,1,0.0379584014415741
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,8,1,0.03891200125217438
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,64,1,0.03750079870223999
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,2,1,0.05879520177841187
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,512,1,1,1,0.09694079756736755
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,4,1,0.04872959852218628
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,8,1,0.0455808013677597
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,16,1,0.044924798607826236
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,32,1,0.044268798828125
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,64,1,0.0448063999414444
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,512,1,1,3,0.08624799847602845
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,2,3,0.050291198492050174
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,4,3,0.040214401483535764
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,8,3,0.038622400164604186
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,16,3,0.037785598635673524
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,32,3,0.03770079910755157
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,64,3,0.03715679943561554
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,512,1,1,3,0.09699199795722961
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,2,3,0.06100640296936035
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,4,3,0.05112159848213196
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,8,3,0.047977599501609805
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,16,3,0.04654400050640106
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,32,3,0.046342399716377256
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,64,3,0.0460752010345459
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,512,1,1,7,0.08702560067176819
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,2,7,0.05061759948730469
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,4,7,0.04032320082187653
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,8,7,0.03871200084686279
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,16,7,0.03776639997959137
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,32,7,0.03758080005645752
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,64,7,0.03756000101566315
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,512,1,1,7,0.10049439668655395
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,2,7,0.06301599740982056
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,4,7,0.05416319966316223
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,8,7,0.05018079876899719
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,16,7,0.04952000081539154
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,32,7,0.0489872008562088
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,64,7,0.0487744003534317
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,512,1,1,15,0.087636798620224
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,2,15,0.05137280225753784
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,4,15,0.040803200006484984
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,8,15,0.03887040019035339
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,16,15,0.03814240097999573
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,32,15,0.037827199697494505
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,64,15,0.03774240016937256
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,512,1,1,15,0.10708479881286621
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,2,15,0.06958879828453064
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,4,15,0.06282879710197449
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,8,15,0.05714240074157715
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,16,15,0.05511999726295471
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,32,15,0.05487359762191772
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,64,15,0.054771202802658084
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,512,1,1,31,0.09117760062217713
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,2,31,0.052376002073287964
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,4,31,0.043049600720405576
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,8,31,0.03918719887733459
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,16,31,0.03782239854335785
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,32,31,0.03789120018482208
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,64,31,0.03755840063095093
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,512,1,1,31,0.12448320388793946
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,2,31,0.08625919818878174
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,4,31,0.07740640044212341
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,8,31,0.0731823980808258
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,16,31,0.06988639831542968
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,32,31,0.067467200756073
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,64,31,0.06742720007896423
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,512,1,1,63,0.09265120029449463
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,2,63,0.057143998146057126
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,4,63,0.04854080080986023
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,8,63,0.04466400146484375
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,16,63,0.041868799924850465
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,32,63,0.04014399945735932
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,64,63,0.03954559862613678
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,512,1,1,63,0.15443040132522584
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,2,63,0.11815199851989747
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,4,63,0.11087520122528076
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,8,63,0.10706880092620849
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,16,63,0.10368000268936158
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,32,63,0.1029520034790039
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,64,63,0.1015663981437683
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,512,1,1,127,0.1057695984840393
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,2,127,0.06699519753456115
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,4,127,0.05856000185012818
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,8,127,0.05484799742698669
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,16,127,0.05283520221710205
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,4,127,0.17524000406265258
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,32,127,0.052051198482513425
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,64,127,0.050843197107315066
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,512,1,1,127,0.22168319225311278
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,2,127,0.1828320026397705
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,8,127,0.17155840396881103
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,16,127,0.16889280080795288
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,32,127,0.1679263949394226
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,64,127,0.167193603515625
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,512,1,1,255,0.13124959468841552
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,2,255,0.0845296025276184
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,4,255,0.07598879933357239
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,8,255,0.07271360158920288
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,16,255,0.07030400037765502
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,32,255,0.06985599994659424
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,4,255,0.3015759944915771
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,64,255,0.06899359822273254
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,512,1,1,255,0.35485761165618895
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,2,255,0.30869920253753663
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,8,255,0.2981775999069214
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,16,255,0.296398401260376
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,32,255,0.2956736087799072
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,64,255,0.2943504095077515
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,512,1,1,511,0.21846239566802977
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,2,511,0.14018880128860473
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,4,511,0.1241760015487671
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,8,511,0.11612479686737061
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,16,511,0.11154719591140747
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,32,511,0.11099519729614257
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,64,511,0.10903680324554443
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,4,511,0.568233585357666
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,2,511,0.583676815032959
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,8,511,0.5601439952850342
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,512,1,1,511,0.6571055889129639
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,4,1023,0.19240959882736205
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,16,511,0.5556047916412353
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,32,511,0.5545407772064209
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,64,511,0.5546031951904297
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,2,1023,0.20849120616912842
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,512,1,1,1023,0.32366559505462644
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,8,1023,0.18385599851608275
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,16,1023,0.17921600341796876
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,32,1023,0.17795200347900392
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,64,1023,0.1767840027809143
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,512,1,1,1023,1.1928671836853026
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,2,1023,1.0855968475341797
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,4,1023,1.0686623573303222
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,8,1023,1.0608912467956544
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,16,1023,1.0568223953247071
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,32,1023,1.055675220489502
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,64,1023,1.0538031578063964
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,2,2047,0.3591936111450195
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,512,1,1,2047,0.5468031883239746
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,4,2047,0.32617599964141847
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,8,2047,0.3173504114151001
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,32,2047,0.3103679895401001
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,16,2047,0.312662410736084
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,64,2047,0.30969440937042236
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,512,1,1,2047,2.2719520568847655
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,512,1,1,4095,1.0263520240783692
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,2,2047,2.0786815643310548
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,32,2047,2.0509376525878906
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,16,2047,2.053009605407715
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,4,2047,2.065847969055176
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,8,2047,2.0587024688720703
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,64,2047,2.0490175247192384
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,512,1,2,4095,0.6595407962799072
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,512,1,4,4095,0.6323647975921631
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,512,1,8,4095,0.5997471809387207
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,512,1,16,4095,0.5796624183654785
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,512,1,32,4095,0.5786943912506104
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,512,1,64,4095,0.5771696090698242
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1024,1,1,1,0.15598080158233643
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1024,1,2,1,0.086353600025177
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1024,1,4,1,0.07237600088119507
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1024,1,8,1,0.06497600078582763
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1024,1,16,1,0.06187360286712647
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1024,1,32,1,0.061552000045776364
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1024,1,64,1,0.06152480244636536
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,512,1,2,4095,4.078377532958984
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,512,1,1,4095,4.4438926696777346
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,512,1,4,4095,4.0654865264892575
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,512,1,8,4095,4.108003234863281
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1024,1,2,1,0.09783040285110474
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,512,1,16,4095,4.048515319824219
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1024,1,1,1,0.16363999843597413
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,512,1,32,4095,4.046083068847656
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,512,1,64,4095,4.046030426025391
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1024,1,4,1,0.08496479988098145
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1024,1,8,1,0.07681279778480529
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1024,1,16,1,0.07229920029640198
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1024,1,32,1,0.07040320038795471
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1024,1,64,1,0.0706928014755249
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1024,1,1,3,0.1555008053779602
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1024,1,2,3,0.08838559985160828
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1024,1,4,3,0.07271519899368287
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1024,1,8,3,0.0650816023349762
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1024,1,16,3,0.06252480149269105
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1024,1,32,3,0.06165599822998047
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1024,1,64,3,0.061627197265625
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1024,1,1,3,0.16713279485702515
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1024,1,2,3,0.10150719881057739
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1024,1,4,3,0.08704479932785034
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1024,1,8,3,0.07986080050468444
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1024,1,16,3,0.07462080121040345
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1024,1,32,3,0.07341439723968506
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1024,1,64,3,0.07332800030708313
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1024,1,1,7,0.1538272023200989
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1024,1,2,7,0.08808640241622925
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1024,1,4,7,0.07321919798851013
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1024,1,8,7,0.06499519944190979
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1024,1,16,7,0.06262080073356628
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1024,1,32,7,0.061699199676513675
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1024,1,64,7,0.06161919832229614
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1024,1,1,7,0.17424639463424682
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1024,1,2,7,0.10923360586166382
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1024,1,4,7,0.09421600103378296
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1024,1,8,7,0.0880895972251892
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1024,1,16,7,0.08056960105895997
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1024,1,32,7,0.0802287995815277
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1024,1,64,7,0.07909920215606689
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1024,1,1,15,0.15808479785919188
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1024,1,4,15,0.07333279848098755
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1024,1,2,15,0.09120640158653259
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1024,1,8,15,0.06677600145339965
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1024,1,16,15,0.06303359866142273
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1024,1,32,15,0.06188480257987976
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1024,1,64,15,0.06144160032272339
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1024,1,1,15,0.19206880331039428
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1024,1,2,15,0.12503520250320435
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1024,1,4,15,0.10833760499954223
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1024,1,8,15,0.10206079483032227
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1024,1,16,15,0.097980797290802
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1024,1,32,15,0.0940720021724701
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1024,1,64,15,0.09302240014076232
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1024,1,1,31,0.1590831995010376
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1024,1,2,31,0.09128640294075012
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1024,1,4,31,0.07497280240058898
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1024,1,8,31,0.06892960071563721
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1024,1,16,31,0.06392800211906433
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1024,1,32,31,0.06263359785079955
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1024,1,64,31,0.06217280030250549
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1024,1,1,31,0.22172000408172607
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1024,1,2,31,0.1548416018486023
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1024,1,4,31,0.1384703993797302
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1024,1,8,31,0.1308240056037903
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1024,1,16,31,0.1272976040840149
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1024,1,32,31,0.12625919580459594
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1024,1,64,31,0.1264912009239197
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1024,1,1,63,0.16257920265197753
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1024,1,2,63,0.09994239807128906
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1024,1,4,63,0.08111199736595154
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1024,1,8,63,0.07426239848136902
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1024,1,16,63,0.07157760262489318
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1024,1,32,63,0.07069439888000488
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1024,1,64,63,0.06966080069541931
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1024,1,1,63,0.2791615962982178
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1024,1,2,63,0.21653120517730712
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1024,1,4,63,0.19774719476699829
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1024,1,8,63,0.19132319688796998
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1024,1,16,63,0.18845280408859252
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1024,1,32,63,0.1858736038208008
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1024,1,64,63,0.18596320152282714
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1024,1,1,127,0.18761600255966188
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1024,1,2,127,0.11810879707336426
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1024,1,4,127,0.0998528003692627
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1024,1,8,127,0.09217439889907837
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1024,1,16,127,0.08771839737892151
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1024,1,32,127,0.08596320152282715
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1024,1,64,127,0.08517280220985413
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1024,1,1,127,0.4116991996765137
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1024,1,2,127,0.34244959354400634
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1024,1,4,127,0.3255568027496338
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1024,1,8,127,0.31756160259246824
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1024,1,16,127,0.3127295970916748
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1024,1,32,127,0.30969440937042236
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1024,1,64,127,0.30947198867797854
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1024,1,1,255,0.23889119625091554
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1024,1,2,255,0.14945600032806397
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1024,1,4,255,0.13357280492782592
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1024,1,8,255,0.12620160579681397
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1024,1,16,255,0.12416000366210937
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1024,1,32,255,0.1205631971359253
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1024,1,64,255,0.11936160326004028
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1024,1,1,255,0.6797232151031494
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1024,1,2,255,0.5896624088287353
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1024,1,4,255,0.5755919933319091
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1024,1,8,255,0.5689888000488281
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1024,1,16,255,0.566539192199707
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1024,1,32,255,0.5645823955535889
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1024,1,4,511,0.2245743989944458
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1024,1,64,255,0.5628767967224121
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1024,1,64,511,0.19832160472869872
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1024,1,32,511,0.19976480007171632
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1024,1,1,511,0.4014080047607422
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1024,1,2,511,0.25604639053344724
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1024,1,2,511,1.129804801940918
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1024,1,8,511,0.20887999534606932
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1024,1,16,511,0.2021631956100464
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1024,1,1,511,1.2737024307250977
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1024,1,4,511,1.0997535705566406
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1024,1,8,511,1.0850768089294434
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1024,1,16,511,1.0781776428222656
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1024,1,32,511,1.0743200302124023
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1024,1,64,511,1.0746992111206055
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1024,1,1,1023,0.6203055858612061
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1024,1,2,1023,0.4024320125579834
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1024,1,4,1023,0.3610064029693604
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1024,1,8,1023,0.34507200717926023
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1024,1,16,1023,0.3363919973373413
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1024,1,32,1023,0.3334496021270752
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1024,1,64,1023,0.3312432050704956
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1024,1,1,1023,2.3520944595336912
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1024,1,2,1023,2.1313648223876953
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1024,1,8,1023,2.0862335205078124
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1024,1,4,1023,2.1026704788208006
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1024,1,16,1023,2.0763423919677733
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1024,1,32,1023,2.0736272811889647
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,128,1024,1,1,2047,1.0896656036376953
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1024,1,64,1023,2.071667289733887
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,64,1024,1,2,2047,0.7232160091400146
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,32,1024,1,4,2047,0.6543871879577636
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,16,1024,1,8,2047,0.6139440059661865
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,8,1024,1,16,2047,0.6021103858947754
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,4,1024,1,32,2047,0.6001008033752442
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,float16,2,1024,1,64,2047,0.5968480110168457
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,64,1024,1,2,2047,4.125356674194336
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,16,1024,1,8,2047,4.102379226684571
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,128,1024,1,1,2047,4.555775833129883
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,32,1024,1,4,2047,4.103758239746094
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,8,1024,1,16,2047,4.070278549194336
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,4,1024,1,32,2047,4.066811370849609
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flash_attention,float16,fp8,2,1024,1,64,2047,4.069169616699218
