framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,96,1,128,1,float16,float16,0,92.14114379882812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,96,1,128,1,float16,fp8,0,76.75782470703125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,96,1,128,1,fp8,fp8,0,77.73604125976563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,96,4,128,1,float16,fp8,0,77.66591186523438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,96,2,128,1,float16,fp8,0,78.036181640625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,96,2,128,1,fp8,fp8,0,77.67650146484375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,96,2,128,1,float16,float16,0,91.70912475585938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,96,4,128,1,float16,float16,0,93.64130249023438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,96,4,128,1,fp8,fp8,0,78.38804321289062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,96,96,128,1,float16,fp8,0,45.05403442382813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,96,8,128,1,float16,float16,0,93.13111572265625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,96,96,128,1,float16,float16,0,53.50822143554687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,96,96,128,1,fp8,fp8,0,44.79679870605469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,96,1,128,1,float16,float16,0,45.792431640625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,96,1,128,1,float16,fp8,0,38.84244079589844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,96,8,128,1,float16,fp8,0,78.95269165039062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,96,8,128,1,fp8,fp8,0,78.58611450195312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,96,1,128,1,fp8,fp8,0,38.7905517578125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,96,2,128,1,float16,fp8,0,38.818356323242185
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,96,2,128,1,float16,float16,0,45.907595825195315
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,96,2,128,1,fp8,fp8,0,38.64775695800781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,96,4,128,1,float16,float16,0,46.4114501953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,96,4,128,1,float16,fp8,0,39.20411376953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,96,4,128,1,fp8,fp8,0,38.98355407714844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,96,96,128,1,float16,fp8,0,22.18785858154297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,96,96,128,1,float16,float16,0,26.22786865234375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,96,8,128,1,float16,fp8,0,39.44926147460937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,96,8,128,1,float16,float16,0,46.777853393554686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,96,96,128,1,fp8,fp8,0,22.4318603515625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,96,1,128,1,float16,float16,0,22.88288116455078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,96,8,128,1,fp8,fp8,0,39.488226318359374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,96,1,128,1,float16,fp8,0,19.273153686523436
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,96,1,128,1,fp8,fp8,0,19.491108703613282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,96,2,128,1,float16,float16,0,22.905171203613282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,96,2,128,1,float16,fp8,0,19.106761169433593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,96,2,128,1,fp8,fp8,0,19.09351348876953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,96,4,128,1,float16,fp8,0,19.498487854003905
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,96,4,128,1,fp8,fp8,0,19.294189453125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,96,4,128,1,float16,float16,0,23.64232177734375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,96,96,128,1,float16,fp8,0,11.20154266357422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,96,8,128,1,float16,float16,0,23.813206481933594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,96,96,128,1,float16,float16,0,13.395123291015626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,96,8,128,1,float16,fp8,0,19.430941772460937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,96,96,128,1,fp8,fp8,0,11.12384490966797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,96,8,128,1,fp8,fp8,0,19.36304473876953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,96,1,128,1,float16,float16,0,11.574578857421875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,96,1,128,1,float16,fp8,0,9.60564956665039
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,96,1,128,1,fp8,fp8,0,9.849230194091797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,96,2,128,1,float16,fp8,0,9.652584075927734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,96,2,128,1,float16,float16,0,11.512891387939453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,96,2,128,1,fp8,fp8,0,9.55167007446289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,96,4,128,1,float16,fp8,0,9.649209594726562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,96,4,128,1,fp8,fp8,0,9.740731048583985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,96,4,128,1,float16,float16,0,12.005760192871094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,96,8,128,1,float16,float16,0,11.550669097900391
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,96,8,128,1,float16,fp8,0,9.668507385253907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,96,8,128,1,fp8,fp8,0,9.670207977294922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,96,1,128,1,float16,fp8,0,44.054144287109374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,96,1,128,1,fp8,fp8,0,44.09410095214844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,96,2,128,1,float16,fp8,0,44.664013671875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,96,2,128,1,fp8,fp8,0,44.65086364746094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,96,1,128,1,float16,float16,0,52.486090087890624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,96,4,128,1,float16,fp8,0,44.794158935546875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,96,2,128,1,float16,float16,0,52.621392822265626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,96,4,128,1,float16,float16,0,53.08707275390625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,96,96,128,1,float16,fp8,0,26.708355712890626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,96,96,128,1,float16,float16,0,31.492068481445312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,96,96,128,1,fp8,fp8,0,26.74539794921875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,96,1,128,1,float16,float16,0,26.258209228515625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,96,4,128,1,fp8,fp8,0,44.52763061523437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,96,8,128,1,float16,fp8,0,45.34549255371094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,96,8,128,1,fp8,fp8,0,45.34187316894531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,96,8,128,1,float16,float16,0,53.07661743164063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,96,1,128,1,float16,fp8,0,22.181629943847657
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,96,1,128,1,fp8,fp8,0,22.05669403076172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,96,2,128,1,float16,fp8,0,21.83822021484375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,96,2,128,1,float16,float16,0,26.59931640625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,96,2,128,1,fp8,fp8,0,21.96709747314453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,96,4,128,1,float16,fp8,0,22.28380432128906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,96,4,128,1,float16,float16,0,26.786798095703126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,96,4,128,1,fp8,fp8,0,21.917115783691408
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,96,96,128,1,float16,fp8,0,13.288015747070313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,96,96,128,1,float16,float16,0,15.531636047363282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,96,8,128,1,float16,fp8,0,22.716258239746093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,96,8,128,1,float16,float16,0,26.92015380859375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,96,8,128,1,fp8,fp8,0,22.538787841796875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,96,96,128,1,fp8,fp8,0,13.457569885253907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,96,1,128,1,float16,float16,0,13.002650451660156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,96,1,128,1,float16,fp8,0,10.811131286621094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,96,1,128,1,fp8,fp8,0,10.853636932373046
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,96,2,128,1,float16,float16,0,13.316047668457031
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,96,2,128,1,float16,fp8,0,11.013196563720703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,96,2,128,1,fp8,fp8,0,10.891524505615234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,96,4,128,1,float16,fp8,0,11.07076644897461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,96,4,128,1,float16,float16,0,13.263630676269532
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,96,4,128,1,fp8,fp8,0,11.03306884765625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,96,8,128,1,float16,fp8,0,11.158134460449219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,96,96,128,1,float16,float16,0,7.825641632080078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,96,8,128,1,float16,float16,0,13.422811889648438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,96,96,128,1,float16,fp8,0,6.628244781494141
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,96,8,128,1,fp8,fp8,0,11.104547119140625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,96,1,128,1,float16,float16,0,6.423146820068359
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,96,96,128,1,fp8,fp8,0,6.683993530273438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,96,1,128,1,float16,fp8,0,5.435814285278321
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,96,1,128,1,fp8,fp8,0,5.4025520324707035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,96,2,128,1,float16,fp8,0,5.437105560302735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,96,2,128,1,float16,float16,0,6.422376251220703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,96,2,128,1,fp8,fp8,0,5.463056182861328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,96,4,128,1,float16,fp8,0,5.5258129119873045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,96,4,128,1,fp8,fp8,0,5.59198226928711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,96,4,128,1,float16,float16,0,6.507038116455078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,96,8,128,1,float16,float16,0,6.662814331054688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,96,8,128,1,float16,fp8,0,5.5058544158935545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,96,8,128,1,fp8,fp8,0,5.668215942382813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,96,1,128,1,float16,fp8,0,31.23853454589844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,96,1,128,1,fp8,fp8,0,31.0509765625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,96,2,128,1,fp8,fp8,0,30.995794677734374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,96,2,128,1,float16,fp8,0,31.05399169921875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,96,1,128,1,float16,float16,0,37.1395751953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,96,2,128,1,float16,float16,0,36.62507629394531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,96,4,128,1,float16,fp8,0,30.89313659667969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,96,4,128,1,float16,float16,0,37.148458862304686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,96,96,128,1,float16,fp8,0,19.23291931152344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,96,96,128,1,fp8,fp8,0,19.453036499023437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,96,96,128,1,float16,float16,0,22.612339782714844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,96,1,128,1,float16,float16,0,18.307254028320312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,96,4,128,1,fp8,fp8,0,31.47366943359375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,96,8,128,1,float16,fp8,0,31.216647338867187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,96,8,128,1,fp8,fp8,0,31.647525024414062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,96,8,128,1,float16,float16,0,37.50881652832031
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,96,1,128,1,float16,fp8,0,15.241270446777344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,96,1,128,1,fp8,fp8,0,15.266162109375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,96,2,128,1,float16,fp8,0,15.400556945800782
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,96,2,128,1,float16,float16,0,18.645791625976564
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,96,2,128,1,fp8,fp8,0,15.649119567871093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,96,4,128,1,float16,fp8,0,15.694276428222656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,96,4,128,1,float16,float16,0,18.558900451660158
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,96,4,128,1,fp8,fp8,0,15.429107666015625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,96,96,128,1,float16,fp8,0,9.636605072021485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,96,96,128,1,float16,float16,0,11.191433715820313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,96,8,128,1,float16,fp8,0,16.14063262939453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,96,8,128,1,fp8,fp8,0,15.62442626953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,96,8,128,1,float16,float16,0,18.952508544921876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,96,96,128,1,fp8,fp8,0,9.637248229980468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,96,1,128,1,float16,fp8,0,7.6079566955566404
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,96,1,128,1,float16,float16,0,8.978984069824218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,96,1,128,1,fp8,fp8,0,7.714142608642578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,96,2,128,1,float16,fp8,0,7.6679328918457035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,96,2,128,1,float16,float16,0,9.315408325195312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,96,2,128,1,fp8,fp8,0,7.8887886047363285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,96,4,128,1,float16,fp8,0,7.690491485595703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,96,4,128,1,float16,float16,0,9.121887969970704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,96,4,128,1,fp8,fp8,0,7.693459320068359
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,96,8,128,1,float16,float16,0,9.204380798339844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,96,96,128,1,float16,fp8,0,4.842769622802734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,96,96,128,1,float16,float16,0,5.6614734649658205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,96,1,128,1,float16,fp8,0,3.827684783935547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,96,96,128,1,fp8,fp8,0,4.845089721679687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,96,8,128,1,float16,fp8,0,7.912884521484375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,96,1,128,1,float16,float16,0,4.424497604370117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,96,8,128,1,fp8,fp8,0,7.8315887451171875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,96,1,128,1,fp8,fp8,0,3.850521469116211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,96,2,128,1,float16,float16,0,4.475476837158203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,96,2,128,1,float16,fp8,0,3.870479965209961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,96,2,128,1,fp8,fp8,0,3.869460678100586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,96,4,128,1,float16,fp8,0,3.8768558502197266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,96,4,128,1,float16,float16,0,4.472483062744141
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,96,4,128,1,fp8,fp8,0,3.9005985260009766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,96,8,128,1,float16,float16,0,4.560308837890625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,96,8,128,1,float16,fp8,0,3.8761470794677733
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,96,8,128,1,fp8,fp8,0,4.081057739257813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,96,1,128,1,float16,fp8,0,40.436761474609376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,96,1,128,1,fp8,fp8,0,40.78018798828125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,96,2,128,1,float16,fp8,0,40.4808349609375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,96,2,128,1,fp8,fp8,0,40.8318115234375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,96,4,128,1,float16,fp8,0,40.89548034667969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,96,1,128,1,float16,float16,0,47.9672119140625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,96,2,128,1,float16,float16,0,48.02159118652344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,96,4,128,1,float16,float16,0,48.091033935546875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,96,1,128,1,float16,float16,0,23.75958251953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,96,96,128,1,float16,fp8,0,26.320648193359375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,96,96,128,1,fp8,fp8,0,26.317239379882814
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,96,96,128,1,float16,float16,0,30.570440673828124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,96,4,128,1,fp8,fp8,0,40.99000244140625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,96,8,128,1,float16,fp8,0,41.291253662109376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,96,8,128,1,fp8,fp8,0,41.6043701171875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,96,8,128,1,float16,float16,0,49.371890258789065
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,96,1,128,1,float16,fp8,0,19.732463073730468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,96,1,128,1,fp8,fp8,0,20.509541320800782
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,96,2,128,1,float16,fp8,0,19.93824157714844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,96,2,128,1,float16,float16,0,24.20392761230469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,96,2,128,1,fp8,fp8,0,20.075538635253906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,96,4,128,1,float16,fp8,0,20.296209716796874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,96,4,128,1,float16,float16,0,23.885223388671875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,96,4,128,1,fp8,fp8,0,20.518536376953126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,96,96,128,1,float16,float16,0,14.962350463867187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,96,96,128,1,float16,fp8,0,13.2179931640625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,96,8,128,1,float16,fp8,0,20.617471313476564
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,96,8,128,1,fp8,fp8,0,20.86463317871094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,96,96,128,1,fp8,fp8,0,13.166352844238281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,96,1,128,1,float16,float16,0,12.087448120117188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,96,8,128,1,float16,float16,0,24.863031005859376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,96,1,128,1,float16,fp8,0,10.064411163330078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,96,1,128,1,fp8,fp8,0,9.888833618164062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,96,2,128,1,float16,fp8,0,10.309671783447266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,96,2,128,1,fp8,fp8,0,9.941182708740234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,96,2,128,1,float16,float16,0,12.302836608886718
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,96,4,128,1,float16,fp8,0,10.043577575683594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,96,4,128,1,float16,float16,0,11.817620849609375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,96,4,128,1,fp8,fp8,0,10.311573028564453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,96,96,128,1,float16,float16,0,7.487017822265625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,96,96,128,1,float16,fp8,0,6.553662109375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,96,96,128,1,fp8,fp8,0,6.599924468994141
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,96,1,128,1,float16,float16,0,5.758537673950196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,96,8,128,1,float16,fp8,0,10.362446594238282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,96,8,128,1,float16,float16,0,12.349183654785156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,96,1,128,1,float16,fp8,0,5.004411315917968
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,96,8,128,1,fp8,fp8,0,10.224823760986329
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,96,1,128,1,fp8,fp8,0,4.958035278320312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,96,2,128,1,float16,fp8,0,4.959193420410156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,96,2,128,1,fp8,fp8,0,5.014254379272461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,96,2,128,1,float16,float16,0,5.872647857666015
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,96,4,128,1,float16,fp8,0,5.059783935546875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,96,4,128,1,fp8,fp8,0,5.108071899414062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,96,4,128,1,float16,float16,0,6.08275032043457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,96,8,128,1,float16,float16,0,6.080742263793946
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,96,96,128,1,float16,fp8,0,3.3836830139160154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,96,96,128,1,float16,float16,0,3.638751983642578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,96,1,128,1,float16,float16,0,2.8297632217407225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,96,1,128,1,float16,fp8,0,2.6029983520507813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,96,8,128,1,float16,fp8,0,5.081539154052734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,96,96,128,1,fp8,fp8,0,3.313131332397461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,96,1,128,1,fp8,fp8,0,2.6373823165893553
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,96,8,128,1,fp8,fp8,0,5.0612129211425785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,96,2,128,1,float16,float16,0,2.810923194885254
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,96,2,128,1,float16,fp8,0,2.5389120101928713
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,96,2,128,1,fp8,fp8,0,2.66627197265625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,96,4,128,1,float16,float16,0,2.6871376037597656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,96,4,128,1,float16,fp8,0,2.508870315551758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,96,4,128,1,fp8,fp8,0,2.5145183563232423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,96,8,128,1,float16,float16,0,2.858705520629883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,96,8,128,1,float16,fp8,0,2.5393535614013674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,96,8,128,1,fp8,fp8,0,2.6562400817871095
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,96,1,128,1,float16,fp8,0,23.369664001464844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,96,1,128,1,fp8,fp8,0,23.052748107910155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,96,2,128,1,float16,fp8,0,22.927587890625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,96,2,128,1,fp8,fp8,0,22.957540893554686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,96,4,128,1,float16,fp8,0,23.498240661621093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,96,1,128,1,float16,float16,0,27.12438049316406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,96,2,128,1,float16,float16,0,27.86656799316406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,96,4,128,1,float16,float16,0,27.817095947265624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,96,1,128,1,float16,float16,0,13.645118713378906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,96,96,128,1,float16,fp8,0,16.36479797363281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,96,96,128,1,fp8,fp8,0,16.353619384765626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,96,96,128,1,float16,float16,0,19.405868530273438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,96,4,128,1,fp8,fp8,0,24.035321044921876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,96,8,128,1,float16,fp8,0,23.97518310546875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,96,8,128,1,fp8,fp8,0,24.235154724121095
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,96,8,128,1,float16,float16,0,29.059494018554688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,96,1,128,1,float16,fp8,0,11.458070373535156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,96,1,128,1,fp8,fp8,0,11.408689880371094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,96,2,128,1,float16,fp8,0,11.566620635986329
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,96,2,128,1,float16,float16,0,13.66029052734375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,96,2,128,1,fp8,fp8,0,11.711798095703125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,96,4,128,1,float16,fp8,0,11.586196899414062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,96,4,128,1,float16,float16,0,14.011244201660157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,96,4,128,1,fp8,fp8,0,11.684718322753906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,96,8,128,1,float16,fp8,0,11.840325164794923
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,96,8,128,1,float16,float16,0,13.925801086425782
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,96,96,128,1,float16,fp8,0,8.202035522460937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,96,96,128,1,float16,float16,0,9.358975982666015
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,96,1,128,1,float16,float16,0,6.673951721191406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,96,96,128,1,fp8,fp8,0,8.215850830078125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,96,8,128,1,fp8,fp8,0,11.947557067871093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,96,1,128,1,float16,fp8,0,5.793708801269531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,96,1,128,1,fp8,fp8,0,5.7890174865722654
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,96,2,128,1,float16,fp8,0,5.815622329711914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,96,2,128,1,fp8,fp8,0,5.806584167480469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,96,2,128,1,float16,float16,0,6.778057861328125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,96,4,128,1,float16,fp8,0,5.801065444946289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,96,4,128,1,fp8,fp8,0,5.908844757080078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,96,4,128,1,float16,float16,0,6.968720245361328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,96,8,128,1,float16,float16,0,7.399076843261719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,96,1,128,1,float16,fp8,0,2.9418352127075194
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,96,96,128,1,float16,fp8,0,4.19200325012207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,96,96,128,1,float16,float16,0,4.583833694458008
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,96,1,128,1,float16,float16,0,3.4442577362060547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,96,8,128,1,float16,fp8,0,5.967955017089844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,96,96,128,1,fp8,fp8,0,4.125049591064453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,96,8,128,1,fp8,fp8,0,5.976916885375976
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,96,1,128,1,fp8,fp8,0,2.9705520629882813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,96,2,128,1,float16,fp8,0,2.8665599822998047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,96,2,128,1,float16,float16,0,3.268067169189453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,96,2,128,1,fp8,fp8,0,2.9049423217773436
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,96,4,128,1,float16,fp8,0,2.910081672668457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,96,4,128,1,fp8,fp8,0,2.9063743591308593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,96,4,128,1,float16,float16,0,3.4690624237060548
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,96,8,128,1,float16,float16,0,3.4041152954101563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,96,8,128,1,float16,fp8,0,2.951807975769043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,96,96,128,1,float16,float16,0,2.2578399658203123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,96,96,128,1,float16,fp8,0,2.080512046813965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,96,1,128,1,float16,float16,0,1.5576064109802246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,96,1,128,1,float16,fp8,0,1.558948802947998
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,96,96,128,1,fp8,fp8,0,2.0812143325805663
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,96,1,128,1,fp8,fp8,0,1.7071327209472655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,96,8,128,1,fp8,fp8,0,2.9585615158081056
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,96,2,128,1,float16,float16,0,1.5460816383361817
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,96,2,128,1,float16,fp8,0,1.4480128288269043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,96,2,128,1,fp8,fp8,0,1.4732928276062012
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,96,4,128,1,float16,float16,0,1.563105583190918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,96,4,128,1,float16,fp8,0,1.4636336326599122
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,96,4,128,1,fp8,fp8,0,1.4698944091796875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,96,8,128,1,float16,float16,0,1.555720043182373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,96,8,128,1,float16,fp8,0,1.5766655921936035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,96,8,128,1,fp8,fp8,0,1.4915184020996093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,96,1,128,1,fp8,fp8,0,21.298745727539064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,96,1,128,1,float16,fp8,0,21.639736938476563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,96,2,128,1,float16,fp8,0,21.56036376953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,96,2,128,1,fp8,fp8,0,21.788150024414062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,96,1,128,1,float16,float16,0,25.10318603515625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,96,2,128,1,float16,float16,0,26.000015258789062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,96,4,128,1,float16,float16,0,25.77220764160156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,96,4,128,1,float16,fp8,0,21.781404113769533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,96,1,128,1,float16,float16,0,12.60282211303711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,96,96,128,1,float16,fp8,0,17.330369567871095
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,96,4,128,1,fp8,fp8,0,22.29852294921875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,96,96,128,1,float16,float16,0,19.40642395019531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,96,96,128,1,fp8,fp8,0,17.359231567382814
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,96,8,128,1,float16,fp8,0,22.80731964111328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,96,8,128,1,fp8,fp8,0,23.156369018554688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,96,8,128,1,float16,float16,0,27.151959228515626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,96,1,128,1,float16,fp8,0,10.74166259765625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,96,1,128,1,fp8,fp8,0,10.893142700195312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,96,2,128,1,float16,fp8,0,10.761038208007813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,96,2,128,1,fp8,fp8,0,10.837799835205079
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,96,2,128,1,float16,float16,0,12.70849151611328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,96,4,128,1,float16,fp8,0,10.996739196777344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,96,4,128,1,float16,float16,0,12.831983947753907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,96,4,128,1,fp8,fp8,0,10.983833312988281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,96,1,128,1,float16,float16,0,6.200982284545899
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,96,96,128,1,float16,float16,0,9.372026824951172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,96,8,128,1,float16,float16,0,13.037751770019531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,96,1,128,1,float16,fp8,0,5.37457275390625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,96,96,128,1,float16,fp8,0,8.721004486083984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,96,8,128,1,float16,fp8,0,11.512606048583985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,96,96,128,1,fp8,fp8,0,8.675665283203125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,96,8,128,1,fp8,fp8,0,11.204865264892579
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,96,1,128,1,fp8,fp8,0,5.465262222290039
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,96,2,128,1,float16,fp8,0,5.4225727081298825
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,96,2,128,1,fp8,fp8,0,5.42783203125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,96,2,128,1,float16,float16,0,6.247043228149414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,96,4,128,1,float16,fp8,0,5.57249755859375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,96,4,128,1,fp8,fp8,0,5.462380981445312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,96,4,128,1,float16,float16,0,6.345849609375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,96,8,128,1,float16,float16,0,6.51757583618164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,96,1,128,1,float16,float16,0,2.913654327392578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,96,1,128,1,float16,fp8,0,2.8325599670410155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,96,96,128,1,float16,float16,0,4.650505447387696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,96,1,128,1,fp8,fp8,0,2.8379087448120117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,96,96,128,1,float16,fp8,0,4.440726470947266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,96,8,128,1,float16,fp8,0,5.605398559570313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,96,96,128,1,fp8,fp8,0,4.3760639190673825
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,96,8,128,1,fp8,fp8,0,5.613383865356445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,96,2,128,1,float16,fp8,0,2.7295984268188476
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,96,2,128,1,float16,float16,0,3.0295040130615236
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,96,2,128,1,fp8,fp8,0,2.7529264450073243
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,96,4,128,1,float16,fp8,0,2.7500656127929686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,96,4,128,1,float16,float16,0,3.068110466003418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,96,4,128,1,fp8,fp8,0,2.7487632751464846
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,96,8,128,1,float16,fp8,0,2.8246992111206053
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,96,8,128,1,float16,float16,0,3.143166351318359
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,96,96,128,1,float16,float16,0,2.3493888854980467
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,96,1,128,1,float16,float16,0,1.4773280143737793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,96,1,128,1,float16,fp8,0,1.34957275390625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,96,96,128,1,float16,fp8,0,2.202403259277344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,96,8,128,1,fp8,fp8,0,2.820604705810547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,96,1,128,1,fp8,fp8,0,1.3568880081176757
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,96,96,128,1,fp8,fp8,0,2.202126312255859
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,96,2,128,1,float16,float16,0,1.5143312454223632
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,96,2,128,1,float16,fp8,0,1.3851840019226074
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,96,2,128,1,fp8,fp8,0,1.4135583877563476
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,96,4,128,1,float16,float16,0,1.479364776611328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,96,4,128,1,float16,fp8,0,1.3923215866088867
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,96,4,128,1,fp8,fp8,0,1.3958640098571777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,96,8,128,1,float16,float16,0,1.4923456192016602
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,96,8,128,1,float16,fp8,0,1.4285136222839356
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,96,96,128,1,float16,float16,0,1.1709407806396483
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,96,8,128,1,fp8,fp8,0,1.4777152061462402
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,96,96,128,1,fp8,fp8,0,1.15447998046875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,96,96,128,1,float16,fp8,0,1.1178624153137207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,96,1,128,1,float16,float16,0,0.7353903770446777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,96,1,128,1,float16,fp8,0,0.6946208000183105
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,96,1,128,1,fp8,fp8,0,0.7548128128051758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,96,2,128,1,float16,float16,0,0.7375664234161377
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,96,2,128,1,float16,fp8,0,0.6997024059295655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,96,2,128,1,fp8,fp8,0,0.7012735843658447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,96,4,128,1,float16,float16,0,0.7431375980377197
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,96,4,128,1,float16,fp8,0,0.7089168071746826
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,96,4,128,1,fp8,fp8,0,0.7262015819549561
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,96,8,128,1,float16,float16,0,0.7528063774108886
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,96,8,128,1,float16,fp8,0,0.7295135974884033
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,96,8,128,1,fp8,fp8,0,0.7248095989227294
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,96,1,128,1,float16,fp8,0,12.822592163085938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,96,1,128,1,fp8,fp8,0,12.633478546142578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,96,2,128,1,float16,fp8,0,12.801800537109376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,96,2,128,1,fp8,fp8,0,12.756966400146485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,96,1,128,1,float16,float16,0,14.776339721679687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,96,2,128,1,float16,float16,0,14.745413208007813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,96,4,128,1,float16,float16,0,15.178517150878907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,96,4,128,1,float16,fp8,0,12.942672729492188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,96,4,128,1,fp8,fp8,0,13.111714172363282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,96,96,128,1,float16,fp8,0,11.436315155029297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,96,96,128,1,float16,float16,0,12.137583923339843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,96,8,128,1,float16,fp8,0,13.435992431640624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,96,8,128,1,fp8,fp8,0,13.351300048828126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,96,8,128,1,float16,float16,0,15.388877868652344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,96,96,128,1,fp8,fp8,0,11.429289245605469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,96,1,128,1,float16,float16,0,7.271238708496094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,96,1,128,1,float16,fp8,0,6.440756988525391
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,96,1,128,1,fp8,fp8,0,6.320111846923828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,96,2,128,1,float16,fp8,0,6.455599975585938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,96,2,128,1,fp8,fp8,0,6.35479850769043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,96,2,128,1,float16,float16,0,7.776834869384766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,96,4,128,1,float16,fp8,0,6.499820709228516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,96,4,128,1,float16,float16,0,8.165984344482421
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,96,4,128,1,fp8,fp8,0,6.542305755615234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,96,1,128,1,float16,float16,0,3.6428993225097654
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,96,8,128,1,float16,fp8,0,6.732281494140625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,96,96,128,1,float16,float16,0,5.959423828125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,96,8,128,1,float16,float16,0,8.140151977539062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,96,8,128,1,fp8,fp8,0,6.737852478027344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,96,96,128,1,fp8,fp8,0,5.725640106201172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,96,96,128,1,float16,fp8,0,5.7442272186279295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,96,1,128,1,float16,fp8,0,3.188287925720215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,96,1,128,1,fp8,fp8,0,3.2861553192138673
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,96,2,128,1,float16,fp8,0,3.2324752807617188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,96,2,128,1,float16,float16,0,3.5928478240966797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,96,2,128,1,fp8,fp8,0,3.3155696868896483
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,96,4,128,1,float16,fp8,0,3.273700714111328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,96,4,128,1,float16,float16,0,3.7167808532714846
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,96,4,128,1,fp8,fp8,0,3.2501792907714844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,96,8,128,1,float16,float16,0,3.7359359741210936
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,96,1,128,1,float16,fp8,0,1.6027519226074218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,96,1,128,1,float16,float16,0,1.7965232849121093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,96,8,128,1,float16,fp8,0,3.375273513793945
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,96,96,128,1,float16,float16,0,3.039913558959961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,96,96,128,1,float16,fp8,0,2.886961555480957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,96,8,128,1,fp8,fp8,0,3.376496124267578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,96,1,128,1,fp8,fp8,0,1.6893104553222655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,96,96,128,1,fp8,fp8,0,2.876251220703125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,96,2,128,1,float16,float16,0,1.729150390625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,96,2,128,1,float16,fp8,0,1.681870460510254
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,96,2,128,1,fp8,fp8,0,1.6222879409790039
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,96,4,128,1,float16,float16,0,1.7409456253051758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,96,4,128,1,float16,fp8,0,1.6483760833740235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,96,4,128,1,fp8,fp8,0,1.6517984390258789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,96,8,128,1,float16,float16,0,1.7989631652832032
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,96,8,128,1,float16,fp8,0,1.702516746520996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,96,96,128,1,float16,float16,0,1.517193603515625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,96,8,128,1,fp8,fp8,0,1.7445199966430665
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,96,1,128,1,float16,fp8,0,0.8180959701538086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,96,1,128,1,fp8,fp8,0,0.8286992073059082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,96,2,128,1,float16,fp8,0,0.8256896018981934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,96,1,128,1,float16,float16,0,0.849403190612793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,96,96,128,1,float16,fp8,0,1.4517711639404296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,96,2,128,1,float16,float16,0,0.8640463829040528
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,96,2,128,1,fp8,fp8,0,0.8215215682983399
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,96,96,128,1,fp8,fp8,0,1.463987159729004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,96,4,128,1,float16,float16,0,0.8824543952941895
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,96,4,128,1,float16,fp8,0,0.8404687881469727
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,96,4,128,1,fp8,fp8,0,0.8386159896850586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,96,8,128,1,float16,float16,0,0.8965264320373535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,96,8,128,1,fp8,fp8,0,0.8650848388671875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,96,8,128,1,float16,fp8,0,0.8595215797424316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,96,96,128,1,float16,float16,0,0.7742608070373536
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,96,96,128,1,float16,fp8,0,0.7389215946197509
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,96,1,128,1,float16,float16,0,0.44174880981445314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,96,96,128,1,fp8,fp8,0,0.7334320068359375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,96,1,128,1,float16,fp8,0,0.42289438247680666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,96,1,128,1,fp8,fp8,0,0.42386879920959475
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,96,2,128,1,float16,float16,0,0.4425648212432861
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,96,2,128,1,float16,fp8,0,0.42200322151184083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,96,2,128,1,fp8,fp8,0,0.4224063873291016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,96,4,128,1,float16,float16,0,0.45548319816589355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,96,4,128,1,float16,fp8,0,0.43212318420410156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,96,4,128,1,fp8,fp8,0,0.42914562225341796
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,96,8,128,1,float16,float16,0,0.4628592014312744
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,96,8,128,1,float16,fp8,0,0.44327521324157715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,96,8,128,1,fp8,fp8,0,0.4429008007049561
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,96,1,128,1,float16,fp8,0,12.342683410644531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,96,1,128,1,fp8,fp8,0,12.388616180419922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,96,2,128,1,float16,fp8,0,12.548792266845703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,96,2,128,1,fp8,fp8,0,12.545591735839844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,96,1,128,1,float16,float16,0,14.083688354492187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,96,2,128,1,float16,float16,0,14.30316162109375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,96,4,128,1,float16,fp8,0,12.853953552246093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,96,4,128,1,float16,float16,0,14.788862609863282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,96,1,128,1,float16,float16,0,7.121043395996094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,96,4,128,1,fp8,fp8,0,12.811541748046874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,96,8,128,1,float16,fp8,0,13.405708312988281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,96,8,128,1,fp8,fp8,0,13.388601684570313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,96,96,128,1,float16,float16,0,13.277110290527343
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,96,96,128,1,float16,fp8,0,12.971897888183594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,96,96,128,1,fp8,fp8,0,12.893255615234375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,96,8,128,1,float16,float16,0,15.131990051269531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,96,1,128,1,float16,fp8,0,6.253939056396485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,96,1,128,1,fp8,fp8,0,6.225543975830078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,96,2,128,1,float16,fp8,0,6.3239185333251955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,96,2,128,1,float16,float16,0,7.1117713928222654
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,96,2,128,1,fp8,fp8,0,6.2950897216796875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,96,4,128,1,float16,fp8,0,6.546886444091797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,96,4,128,1,float16,float16,0,7.347177886962891
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,96,4,128,1,fp8,fp8,0,6.546537780761719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,96,1,128,1,float16,float16,0,3.4435775756835936
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,96,8,128,1,float16,float16,0,7.85113754272461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,96,1,128,1,float16,fp8,0,3.1370288848876955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,96,8,128,1,float16,fp8,0,6.698299407958984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,96,8,128,1,fp8,fp8,0,6.752361297607422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,96,96,128,1,float16,fp8,0,6.457003021240235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,96,96,128,1,float16,float16,0,6.652009582519531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,96,96,128,1,fp8,fp8,0,6.499699401855469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,96,1,128,1,fp8,fp8,0,3.190127944946289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,96,2,128,1,float16,float16,0,3.5210239410400392
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,96,2,128,1,float16,fp8,0,3.169193649291992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,96,2,128,1,fp8,fp8,0,3.1550176620483397
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,96,4,128,1,float16,float16,0,3.5518848419189455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,96,4,128,1,float16,fp8,0,3.2204334259033205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,96,4,128,1,fp8,fp8,0,3.249729537963867
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,96,8,128,1,float16,float16,0,3.6844112396240236
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,96,8,128,1,float16,fp8,0,3.382921600341797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,96,8,128,1,fp8,fp8,0,3.398049545288086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,96,96,128,1,float16,float16,0,3.373369598388672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,96,1,128,1,float16,float16,0,1.642892837524414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,96,1,128,1,float16,fp8,0,1.584864044189453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,96,96,128,1,float16,fp8,0,3.2445472717285155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,96,1,128,1,fp8,fp8,0,1.653099250793457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,96,2,128,1,float16,float16,0,1.676335906982422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,96,96,128,1,fp8,fp8,0,3.3466705322265624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,96,2,128,1,float16,fp8,0,1.5940064430236816
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,96,2,128,1,fp8,fp8,0,1.6039007186889649
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,96,4,128,1,float16,float16,0,1.7329984664916993
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,96,4,128,1,float16,fp8,0,1.6437040328979493
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,96,4,128,1,fp8,fp8,0,1.6284128189086915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,96,8,128,1,float16,float16,0,1.7899696350097656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,96,8,128,1,float16,fp8,0,1.7607551574707032
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,96,8,128,1,fp8,fp8,0,1.7029808044433594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,96,1,128,1,float16,float16,0,0.8359871864318847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,96,96,128,1,float16,float16,0,1.7003007888793946
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,96,1,128,1,float16,fp8,0,0.8011407852172852
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,96,96,128,1,float16,fp8,0,1.6454736709594726
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,96,1,128,1,fp8,fp8,0,0.8355728149414062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,96,96,128,1,fp8,fp8,0,1.6643360137939454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,96,2,128,1,float16,float16,0,0.8404144287109375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,96,2,128,1,float16,fp8,0,0.8139792442321777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,96,2,128,1,fp8,fp8,0,0.8085984230041504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,96,4,128,1,float16,float16,0,0.859670352935791
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,96,4,128,1,float16,fp8,0,0.8277104377746582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,96,4,128,1,fp8,fp8,0,0.829918384552002
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,96,8,128,1,float16,float16,0,0.907436752319336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,96,8,128,1,float16,fp8,0,0.867091178894043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,96,8,128,1,fp8,fp8,0,0.8652400016784668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,96,96,128,1,float16,float16,0,0.8586079597473144
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,96,96,128,1,float16,fp8,0,0.833193588256836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,96,1,128,1,float16,float16,0,0.42429280281066895
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,96,96,128,1,fp8,fp8,0,0.8384511947631836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,96,1,128,1,float16,fp8,0,0.412608003616333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,96,1,128,1,fp8,fp8,0,0.4131008148193359
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,96,2,128,1,float16,float16,0,0.43416638374328614
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,96,2,128,1,float16,fp8,0,0.4201183795928955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,96,2,128,1,fp8,fp8,0,0.421998405456543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,96,4,128,1,float16,float16,0,0.44130558967590333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,96,4,128,1,float16,fp8,0,0.4258848190307617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,96,4,128,1,fp8,fp8,0,0.42668957710266114
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,96,8,128,1,float16,float16,0,0.45801281929016113
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,96,8,128,1,float16,fp8,0,0.4465536117553711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,96,8,128,1,fp8,fp8,0,0.44759359359741213
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,96,96,128,1,float16,float16,0,0.44383039474487307
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,96,96,128,1,float16,fp8,0,0.43152318000793455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,96,96,128,1,fp8,fp8,0,0.42911200523376464
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,96,1,128,1,float16,float16,0,0.22681760787963867
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,96,1,128,1,float16,fp8,0,0.218558406829834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,96,1,128,1,fp8,fp8,0,0.22019999027252196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,96,2,128,1,float16,float16,0,0.22927680015563964
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,96,8,128,1,float16,float16,0,0.2430255889892578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,96,2,128,1,float16,fp8,0,0.22218880653381348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,96,2,128,1,fp8,fp8,0,0.2216320037841797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,96,4,128,1,float16,float16,0,0.2342736005783081
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,96,4,128,1,float16,fp8,0,0.22630081176757813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,96,4,128,1,fp8,fp8,0,0.22695519924163818
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,96,8,128,1,float16,fp8,0,0.233404803276062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,96,8,128,1,fp8,fp8,0,0.23608479499816895
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,96,1,128,1,float16,fp8,0,7.6797630310058596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,96,1,128,1,fp8,fp8,0,7.660793304443359
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,96,1,128,1,float16,float16,0,8.514895629882812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,96,2,128,1,float16,fp8,0,7.771678161621094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,96,2,128,1,fp8,fp8,0,7.781918334960937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,96,2,128,1,float16,float16,0,8.652710723876954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,96,4,128,1,float16,fp8,0,8.015144348144531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,96,4,128,1,float16,float16,0,8.892726135253906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,96,1,128,1,float16,float16,0,4.226480102539062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,96,4,128,1,fp8,fp8,0,8.012057495117187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,96,8,128,1,float16,fp8,0,8.44632797241211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,96,8,128,1,fp8,fp8,0,8.423155212402344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,96,8,128,1,float16,float16,0,9.294064331054688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,96,96,128,1,float16,float16,0,9.022724914550782
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,96,96,128,1,float16,fp8,0,8.938702392578126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,96,96,128,1,fp8,fp8,0,8.893214416503906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,96,1,128,1,float16,fp8,0,3.8508609771728515
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,96,1,128,1,fp8,fp8,0,3.8587871551513673
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,96,2,128,1,float16,float16,0,4.1899566650390625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,96,2,128,1,float16,fp8,0,3.946521759033203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,96,2,128,1,fp8,fp8,0,3.8955039978027344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,96,4,128,1,float16,float16,0,4.405174255371094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,96,4,128,1,float16,fp8,0,4.0217632293701175
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,96,4,128,1,fp8,fp8,0,4.023926544189453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,96,1,128,1,float16,float16,0,1.9870624542236328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,96,8,128,1,float16,float16,0,4.600711822509766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,96,8,128,1,float16,fp8,0,4.276136016845703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,96,8,128,1,fp8,fp8,0,4.2406352996826175
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,96,1,128,1,float16,fp8,0,1.937883186340332
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,96,96,128,1,float16,float16,0,4.524382400512695
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,96,96,128,1,float16,fp8,0,4.488739013671875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,96,96,128,1,fp8,fp8,0,4.4583984375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,96,1,128,1,fp8,fp8,0,1.9895008087158204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,96,2,128,1,float16,float16,0,2.0851184844970705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,96,2,128,1,float16,fp8,0,1.9775823593139648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,96,2,128,1,fp8,fp8,0,1.9765504837036132
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,96,4,128,1,float16,float16,0,2.124731254577637
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,96,4,128,1,float16,fp8,0,2.011926460266113
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,96,4,128,1,fp8,fp8,0,2.0261503219604493
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,96,8,128,1,float16,float16,0,2.226089668273926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,96,8,128,1,float16,fp8,0,2.1314672470092773
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,96,8,128,1,fp8,fp8,0,2.1615327835083007
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,96,1,128,1,float16,float16,0,0.9995375633239746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,96,1,128,1,float16,fp8,0,0.9853232383728028
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,96,96,128,1,float16,float16,0,2.285476875305176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,96,96,128,1,float16,fp8,0,2.240675163269043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,96,1,128,1,fp8,fp8,0,1.0215503692626953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,96,96,128,1,fp8,fp8,0,2.296345520019531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,96,2,128,1,float16,float16,0,1.0237296104431153
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,96,2,128,1,float16,fp8,0,0.9963135719299316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,96,2,128,1,fp8,fp8,0,0.9932127952575683
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,96,4,128,1,float16,float16,0,1.056492805480957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,96,4,128,1,float16,fp8,0,1.0276783943176269
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,96,4,128,1,fp8,fp8,0,1.014577579498291
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,96,8,128,1,float16,float16,0,1.1125328063964843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,96,8,128,1,float16,fp8,0,1.079315185546875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,96,8,128,1,fp8,fp8,0,1.0794464111328126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,96,96,128,1,float16,float16,0,1.164187240600586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,96,1,128,1,float16,float16,0,0.5164591789245605
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,96,96,128,1,float16,fp8,0,1.1360383987426759
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,96,1,128,1,float16,fp8,0,0.49640960693359376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,96,96,128,1,fp8,fp8,0,1.1556464195251466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,96,1,128,1,fp8,fp8,0,0.5078400135040283
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,96,2,128,1,float16,float16,0,0.5226304054260253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,96,2,128,1,float16,fp8,0,0.5125167846679688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,96,2,128,1,fp8,fp8,0,0.5104464054107666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,96,4,128,1,float16,float16,0,0.5360976219177246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,96,4,128,1,float16,fp8,0,0.5233071804046631
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,96,4,128,1,fp8,fp8,0,0.5173984050750733
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,96,8,128,1,float16,float16,0,0.5666128158569336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,96,8,128,1,float16,fp8,0,0.5516016006469726
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,96,8,128,1,fp8,fp8,0,0.5513487815856933
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,96,96,128,1,float16,float16,0,0.5929952144622803
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,96,96,128,1,float16,fp8,0,0.5851471900939942
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,96,1,128,1,float16,float16,0,0.26949760913848875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,96,96,128,1,fp8,fp8,0,0.5807519912719726
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,96,1,128,1,float16,fp8,0,0.2636672019958496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,96,1,128,1,fp8,fp8,0,0.2644752025604248
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,96,2,128,1,float16,float16,0,0.27370400428771974
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,96,2,128,1,float16,fp8,0,0.266811203956604
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,96,2,128,1,fp8,fp8,0,0.2664383888244629
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,96,4,128,1,float16,float16,0,0.2798448085784912
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,96,4,128,1,float16,fp8,0,0.2722464084625244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,96,4,128,1,fp8,fp8,0,0.27269759178161623
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,96,8,128,1,float16,float16,0,0.2936592102050781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,96,8,128,1,float16,fp8,0,0.28712799549102785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,96,1,128,1,float16,fp8,0,0.14551520347595215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,96,1,128,1,fp8,fp8,0,0.14490400552749633
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,96,8,128,1,fp8,fp8,0,0.28819200992584226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,96,96,128,1,float16,float16,0,0.3138927936553955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,96,96,128,1,float16,fp8,0,0.306441593170166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,96,96,128,1,fp8,fp8,0,0.30543038845062254
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,96,1,128,1,float16,float16,0,0.14828319549560548
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,96,2,128,1,float16,float16,0,0.1498751997947693
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,96,2,128,1,float16,fp8,0,0.1455888032913208
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,96,2,128,1,fp8,fp8,0,0.1454800009727478
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,96,4,128,1,float16,float16,0,0.15106240510940552
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,96,4,128,1,float16,fp8,0,0.14633760452270508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,96,4,128,1,fp8,fp8,0,0.14665119647979735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,96,8,128,1,float16,float16,0,0.15728960037231446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,96,8,128,1,float16,fp8,0,0.155076801776886
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,96,8,128,1,fp8,fp8,0,0.15556639432907104
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,96,1,128,1,float16,fp8,0,8.012918090820312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,96,1,128,1,fp8,fp8,0,8.049428558349609
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,96,1,128,1,float16,float16,0,8.495075225830078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,96,2,128,1,float16,float16,0,8.71661605834961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,96,2,128,1,float16,fp8,0,8.165760040283203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,96,2,128,1,fp8,fp8,0,8.14034423828125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,96,4,128,1,float16,fp8,0,8.472795104980468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,96,4,128,1,float16,float16,0,8.989344024658203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,96,4,128,1,fp8,fp8,0,8.426161956787109
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,96,8,128,1,float16,fp8,0,9.100761413574219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,96,8,128,1,float16,float16,0,9.56153564453125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,96,8,128,1,fp8,fp8,0,9.102918243408203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,96,96,128,1,float16,float16,0,10.899336242675782
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,96,96,128,1,float16,fp8,0,10.727174377441406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,96,1,128,1,float16,float16,0,4.260017776489258
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,96,1,128,1,float16,fp8,0,4.046047973632812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,96,96,128,1,fp8,fp8,0,10.820465850830079
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,96,1,128,1,fp8,fp8,0,4.072808074951172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,96,2,128,1,float16,float16,0,4.249723052978515
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,96,2,128,1,float16,fp8,0,4.125780868530273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,96,2,128,1,fp8,fp8,0,4.091849517822266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,96,4,128,1,float16,float16,0,4.435201644897461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,96,4,128,1,float16,fp8,0,4.288483047485352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,96,4,128,1,fp8,fp8,0,4.252431869506836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,96,8,128,1,float16,fp8,0,4.560443115234375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,96,8,128,1,float16,float16,0,4.722129440307617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,96,8,128,1,fp8,fp8,0,4.577337646484375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,96,1,128,1,float16,float16,0,2.068332862854004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,96,96,128,1,float16,float16,0,5.4599567413330075
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,96,1,128,1,float16,fp8,0,2.031241607666016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,96,1,128,1,fp8,fp8,0,2.0503471374511717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,96,2,128,1,float16,float16,0,2.1165103912353516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,96,2,128,1,float16,fp8,0,2.0763599395751955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,96,96,128,1,float16,fp8,0,5.381835174560547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,96,2,128,1,fp8,fp8,0,2.095175933837891
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,96,4,128,1,float16,float16,0,2.172844886779785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,96,96,128,1,fp8,fp8,0,5.460220718383789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,96,4,128,1,float16,fp8,0,2.141681671142578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,96,4,128,1,fp8,fp8,0,2.1401744842529298
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,96,8,128,1,float16,float16,0,2.340380859375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,96,8,128,1,float16,fp8,0,2.2878864288330076
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,96,1,128,1,float16,float16,0,1.0388336181640625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,96,8,128,1,fp8,fp8,0,2.2678991317749024
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,96,1,128,1,float16,fp8,0,1.0242735862731933
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,96,96,128,1,float16,float16,0,2.7519935607910155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,96,1,128,1,fp8,fp8,0,1.0439760208129882
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,96,96,128,1,float16,fp8,0,2.753630447387695
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,96,2,128,1,float16,float16,0,1.0585103988647462
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,96,2,128,1,fp8,fp8,0,1.0399680137634277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,96,96,128,1,fp8,fp8,0,2.719228744506836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,96,2,128,1,float16,fp8,0,1.0444416046142577
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,96,4,128,1,float16,float16,0,1.089459228515625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,96,4,128,1,fp8,fp8,0,1.0863903999328612
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,96,4,128,1,float16,fp8,0,1.08439359664917
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,96,8,128,1,float16,float16,0,1.1730688095092774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,96,8,128,1,fp8,fp8,0,1.1435471534729005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,96,8,128,1,float16,fp8,0,1.148911952972412
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,96,1,128,1,float16,float16,0,0.5341807842254639
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,96,96,128,1,float16,float16,0,1.3904800415039062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,96,1,128,1,float16,fp8,0,0.5281599998474121
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,96,1,128,1,fp8,fp8,0,0.5300687789916992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,96,96,128,1,float16,fp8,0,1.3671279907226563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,96,96,128,1,fp8,fp8,0,1.3655424118041992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,96,2,128,1,float16,float16,0,0.5341648101806641
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,96,2,128,1,float16,fp8,0,0.5339151859283447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,96,2,128,1,fp8,fp8,0,0.538812780380249
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,96,4,128,1,float16,float16,0,0.556824016571045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,96,4,128,1,float16,fp8,0,0.5545760154724121
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,96,4,128,1,fp8,fp8,0,0.5561056137084961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,96,8,128,1,float16,float16,0,0.5946112155914307
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,96,8,128,1,float16,fp8,0,0.5862768173217774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,96,8,128,1,fp8,fp8,0,0.5833439826965332
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,96,96,128,1,float16,float16,0,0.7065199851989746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,96,96,128,1,float16,fp8,0,0.7031631946563721
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,96,1,128,1,float16,float16,0,0.2760544061660767
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,96,1,128,1,float16,fp8,0,0.2755840063095093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,96,96,128,1,fp8,fp8,0,0.6941967964172363
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,96,1,128,1,fp8,fp8,0,0.27376160621643064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,96,2,128,1,float16,float16,0,0.28262081146240237
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,96,2,128,1,float16,fp8,0,0.2784960031509399
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,96,2,128,1,fp8,fp8,0,0.27964320182800295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,96,4,128,1,float16,float16,0,0.292795205116272
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,96,4,128,1,float16,fp8,0,0.28694400787353513
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,96,4,128,1,fp8,fp8,0,0.2895456075668335
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,96,8,128,1,float16,float16,0,0.3078943967819214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,96,8,128,1,float16,fp8,0,0.3038975954055786
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,96,8,128,1,fp8,fp8,0,0.30499680042266847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,96,96,128,1,float16,float16,0,0.3658704042434692
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,96,2,128,1,float16,float16,0,0.15141119956970214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,96,96,128,1,float16,fp8,0,0.35934879779815676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,96,1,128,1,float16,float16,0,0.15132960081100463
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,96,96,128,1,fp8,fp8,0,0.36292641162872313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,96,1,128,1,float16,fp8,0,0.15204800367355348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,96,1,128,1,fp8,fp8,0,0.14990400075912474
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,96,2,128,1,float16,fp8,0,0.1503167986869812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,96,2,128,1,fp8,fp8,0,0.14965759515762328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,96,8,128,1,fp8,fp8,0,0.16398240327835084
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,96,4,128,1,float16,float16,0,0.1580464005470276
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,96,4,128,1,float16,fp8,0,0.15558240413665772
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,96,4,128,1,fp8,fp8,0,0.15577759742736816
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,96,8,128,1,float16,float16,0,0.1662495970726013
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,96,8,128,1,float16,fp8,0,0.16309599876403807
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,96,96,128,1,float16,float16,0,0.19556959867477416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,96,96,128,1,float16,fp8,0,0.19133280515670775
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,96,96,128,1,fp8,fp8,0,0.19190560579299926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,96,1,128,1,float16,float16,0,0.0871775984764099
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,96,1,128,1,float16,fp8,0,0.0823087990283966
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,96,1,128,1,fp8,fp8,0,0.08299840092658997
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,96,2,128,1,float16,float16,0,0.0871504008769989
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,96,2,128,1,float16,fp8,0,0.08357440233230591
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,96,2,128,1,fp8,fp8,0,0.08254719972610473
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,96,4,128,1,float16,float16,0,0.08884959816932678
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,96,4,128,1,float16,fp8,0,0.08570399880409241
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,96,4,128,1,fp8,fp8,0,0.08421440124511718
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,96,8,128,1,float16,float16,0,0.09524800181388855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,96,8,128,1,float16,fp8,0,0.09049760103225708
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,96,8,128,1,fp8,fp8,0,0.09172000288963318
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,96,1,128,1,float16,float16,0,5.796478271484375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,96,1,128,1,float16,fp8,0,5.8779041290283205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,96,1,128,1,fp8,fp8,0,5.89958381652832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,96,2,128,1,float16,float16,0,5.930977630615234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,96,2,128,1,float16,fp8,0,6.043921661376953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,96,2,128,1,fp8,fp8,0,6.044627380371094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,96,4,128,1,float16,float16,0,6.208905410766602
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,96,4,128,1,float16,fp8,0,6.326851272583008
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,96,4,128,1,fp8,fp8,0,6.377051162719726
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,96,8,128,1,float16,float16,0,6.856412506103515
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,96,1,128,1,float16,float16,0,2.880284881591797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,96,8,128,1,float16,fp8,0,6.865805053710938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,96,8,128,1,fp8,fp8,0,6.947196960449219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,96,1,128,1,float16,fp8,0,2.9786352157592773
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,96,96,128,1,float16,float16,0,9.684588623046874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,96,1,128,1,fp8,fp8,0,2.978868865966797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,96,2,128,1,float16,float16,0,2.9691728591918944
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,96,2,128,1,float16,fp8,0,3.017635154724121
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,96,96,128,1,float16,fp8,0,9.734982299804688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,96,2,128,1,fp8,fp8,0,3.0587615966796875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,96,96,128,1,fp8,fp8,0,9.7781982421875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,96,4,128,1,float16,float16,0,3.0916255950927733
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,96,4,128,1,float16,fp8,0,3.177463912963867
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,96,4,128,1,fp8,fp8,0,3.1909887313842775
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,96,8,128,1,float16,float16,0,3.3989295959472656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,96,8,128,1,float16,fp8,0,3.437963104248047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,96,8,128,1,fp8,fp8,0,3.4747646331787108
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,96,1,128,1,float16,float16,0,1.4472288131713866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,96,1,128,1,float16,fp8,0,1.5032575607299805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,96,96,128,1,float16,float16,0,4.851286315917969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,96,1,128,1,fp8,fp8,0,1.5038175582885742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,96,2,128,1,float16,float16,0,1.4748064041137696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,96,2,128,1,float16,fp8,0,1.5314623832702636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,96,96,128,1,float16,fp8,0,4.878915023803711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,96,2,128,1,fp8,fp8,0,1.5293024063110352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,96,4,128,1,float16,float16,0,1.5618144035339356
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,96,96,128,1,fp8,fp8,0,4.903107070922852
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,96,4,128,1,float16,fp8,0,1.5960000038146973
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,96,4,128,1,fp8,fp8,0,1.6065935134887694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,96,8,128,1,float16,float16,0,1.692804718017578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,96,8,128,1,float16,fp8,0,1.7447200775146485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,96,8,128,1,fp8,fp8,0,1.7426944732666017
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,96,1,128,1,float16,float16,0,0.7347055912017822
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,96,1,128,1,float16,fp8,0,0.7607103824615479
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,96,96,128,1,float16,float16,0,2.443198394775391
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,96,1,128,1,fp8,fp8,0,0.7504528045654297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,96,2,128,1,float16,float16,0,0.7562880039215087
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,96,96,128,1,float16,fp8,0,2.464039993286133
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,96,96,128,1,fp8,fp8,0,2.464926338195801
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,96,2,128,1,float16,fp8,0,0.7755680084228516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,96,2,128,1,fp8,fp8,0,0.7753600120544434
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,96,4,128,1,float16,float16,0,0.7929488182067871
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,96,4,128,1,float16,fp8,0,0.8097727775573731
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,96,4,128,1,fp8,fp8,0,0.8053631782531738
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,96,8,128,1,float16,float16,0,0.8664079666137695
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,96,8,128,1,float16,fp8,0,0.8858160018920899
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,96,8,128,1,fp8,fp8,0,0.8885807991027832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,96,96,128,1,float16,float16,0,1.2397040367126464
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,96,1,128,1,float16,float16,0,0.38027200698852537
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,96,1,128,1,float16,fp8,0,0.3884687900543213
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,96,96,128,1,float16,fp8,0,1.2366399765014648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,96,1,128,1,fp8,fp8,0,0.3908639907836914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,96,96,128,1,fp8,fp8,0,1.2450400352478028
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,96,2,128,1,float16,float16,0,0.3893599987030029
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,96,2,128,1,float16,fp8,0,0.39993278980255126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,96,2,128,1,fp8,fp8,0,0.39915359020233154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,96,4,128,1,float16,float16,0,0.40653600692749026
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,96,4,128,1,float16,fp8,0,0.41440157890319823
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,96,4,128,1,fp8,fp8,0,0.4159103870391846
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,96,8,128,1,float16,float16,0,0.445308780670166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,96,8,128,1,float16,fp8,0,0.4511807918548584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,96,8,128,1,fp8,fp8,0,0.4533247947692871
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,96,96,128,1,float16,float16,0,0.6320511817932128
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,96,96,128,1,float16,fp8,0,0.6316895961761475
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,96,1,128,1,float16,float16,0,0.2004240036010742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,96,96,128,1,fp8,fp8,0,0.6308464050292969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,96,4,128,1,float16,float16,0,0.21536319255828856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,96,1,128,1,float16,fp8,0,0.20571680068969728
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,96,1,128,1,fp8,fp8,0,0.2057647943496704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,96,2,128,1,float16,float16,0,0.20630879402160646
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,96,2,128,1,float16,fp8,0,0.21189439296722412
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,96,2,128,1,fp8,fp8,0,0.21125600337982178
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,96,4,128,1,float16,fp8,0,0.21895039081573486
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,96,4,128,1,fp8,fp8,0,0.2197279930114746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,96,8,128,1,float16,float16,0,0.23459360599517823
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,96,8,128,1,float16,fp8,0,0.2362207889556885
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,96,8,128,1,fp8,fp8,0,0.23708479404449462
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,96,96,128,1,float16,float16,0,0.32891039848327636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,96,96,128,1,float16,fp8,0,0.32725598812103274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,96,1,128,1,float16,float16,0,0.11210880279541016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,96,96,128,1,fp8,fp8,0,0.3279695987701416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,96,4,128,1,float16,float16,0,0.11796799898147584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,96,1,128,1,float16,fp8,0,0.11514719724655151
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,96,1,128,1,fp8,fp8,0,0.11516480445861817
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,96,2,128,1,float16,float16,0,0.11235519647598266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,96,2,128,1,float16,fp8,0,0.11579840183258057
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,96,2,128,1,fp8,fp8,0,0.11616959571838378
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,96,4,128,1,float16,fp8,0,0.12185920476913452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,96,4,128,1,fp8,fp8,0,0.12042399644851684
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,96,8,128,1,float16,float16,0,0.12785760164260865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,96,8,128,1,float16,fp8,0,0.12904319763183594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,96,8,128,1,fp8,fp8,0,0.12861440181732178
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,96,96,128,1,float16,float16,0,0.17688000202178955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,96,96,128,1,float16,fp8,0,0.17559200525283813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,96,96,128,1,fp8,fp8,0,0.17602880001068116
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,96,1,128,1,float16,float16,0,0.06845279932022094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,96,1,128,1,float16,fp8,0,0.06488800048828125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,96,1,128,1,fp8,fp8,0,0.06488320231437683
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,96,2,128,1,float16,float16,0,0.06928640007972717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,96,2,128,1,float16,fp8,0,0.06611999869346619
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,96,2,128,1,fp8,fp8,0,0.0658079981803894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,96,4,128,1,float16,float16,0,0.06957280039787292
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,96,4,128,1,float16,fp8,0,0.06669279932975769
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,96,4,128,1,fp8,fp8,0,0.0664143979549408
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,96,8,128,1,float16,float16,0,0.0759663999080658
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,96,8,128,1,float16,fp8,0,0.07321119904518128
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,96,8,128,1,fp8,fp8,0,0.0739184021949768
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,96,96,128,1,float16,float16,0,0.09930880069732666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,96,96,128,1,float16,fp8,0,0.0968176007270813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,96,96,128,1,fp8,fp8,0,0.09529280066490173
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,96,1,128,1,float16,float16,0,0.04007200002670288
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,96,1,128,1,float16,fp8,0,0.0418256014585495
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,96,1,128,1,fp8,fp8,0,0.041300800442695615
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,96,2,128,1,float16,float16,0,0.040212801098823546
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,96,2,128,1,float16,fp8,0,0.04089919924736023
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,96,2,128,1,fp8,fp8,0,0.04110719859600067
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,96,4,128,1,float16,float16,0,0.04026240110397339
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,96,4,128,1,float16,fp8,0,0.04126240015029907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,96,4,128,1,fp8,fp8,0,0.04162879884243011
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,96,8,128,1,float16,float16,0,0.04126240015029907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,96,8,128,1,float16,fp8,0,0.04264000058174133
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,96,8,128,1,fp8,fp8,0,0.0428272008895874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,96,1,128,1,float16,float16,0,2.2263151168823243
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,96,1,128,1,float16,fp8,0,2.373886489868164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,96,1,128,1,fp8,fp8,0,2.370963287353516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,96,2,128,1,float16,float16,0,2.303971290588379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,96,2,128,1,float16,fp8,0,2.472443199157715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,96,2,128,1,fp8,fp8,0,2.4459087371826174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,96,4,128,1,float16,float16,0,2.457017517089844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,96,4,128,1,float16,fp8,0,2.589899253845215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,96,4,128,1,fp8,fp8,0,2.5902271270751953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,96,8,128,1,float16,float16,0,2.754240036010742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,96,1,128,1,float16,float16,0,1.1297807693481445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,96,8,128,1,float16,fp8,0,2.8676048278808595
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,96,8,128,1,fp8,fp8,0,2.866921615600586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,96,1,128,1,float16,fp8,0,1.2005359649658203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,96,1,128,1,fp8,fp8,0,1.2015456199645995
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,96,2,128,1,float16,float16,0,1.167619228363037
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,96,2,128,1,float16,fp8,0,1.2358176231384277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,96,2,128,1,fp8,fp8,0,1.237065601348877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,96,96,128,1,float16,float16,0,4.599153518676758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,96,96,128,1,float16,fp8,0,4.540092849731446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,96,4,128,1,float16,float16,0,1.2394911766052246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,96,96,128,1,fp8,fp8,0,4.5440513610839846
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,96,4,128,1,float16,fp8,0,1.3075504302978516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,96,4,128,1,fp8,fp8,0,1.308720016479492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,96,8,128,1,float16,float16,0,1.3844191551208496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,96,1,128,1,float16,float16,0,0.576475191116333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,96,8,128,1,float16,fp8,0,1.4463232040405274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,96,1,128,1,float16,fp8,0,0.6136767864227295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,96,8,128,1,fp8,fp8,0,1.4495792388916016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,96,1,128,1,fp8,fp8,0,0.6119167804718018
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,96,2,128,1,float16,float16,0,0.5955103874206543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,96,96,128,1,float16,float16,0,2.311894416809082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,96,96,128,1,fp8,fp8,0,2.284280014038086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,96,2,128,1,float16,fp8,0,0.6323023796081543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,96,96,128,1,float16,fp8,0,2.2823968887329102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,96,2,128,1,fp8,fp8,0,0.63230881690979
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,96,4,128,1,float16,float16,0,0.6344272136688233
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,96,4,128,1,float16,fp8,0,0.666270399093628
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,96,4,128,1,fp8,fp8,0,0.6670479774475098
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,96,8,128,1,float16,float16,0,0.7078767776489258
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,96,8,128,1,float16,fp8,0,0.7365039825439453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,96,8,128,1,fp8,fp8,0,0.7361167907714844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,96,1,128,1,float16,float16,0,0.30066399574279784
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,96,96,128,1,float16,float16,0,1.1696208000183106
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,96,1,128,1,float16,fp8,0,0.3197279930114746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,96,96,128,1,float16,fp8,0,1.1544591903686523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,96,1,128,1,fp8,fp8,0,0.3193808078765869
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,96,96,128,1,fp8,fp8,0,1.1534111976623536
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,96,4,128,1,float16,fp8,0,0.34600000381469725
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,96,2,128,1,float16,float16,0,0.30919039249420166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,96,2,128,1,float16,fp8,0,0.3276479959487915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,96,2,128,1,fp8,fp8,0,0.32719519138336184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,96,4,128,1,float16,float16,0,0.3276655912399292
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,96,4,128,1,fp8,fp8,0,0.346126389503479
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,96,8,128,1,float16,float16,0,0.36526401042938234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,96,8,128,1,float16,fp8,0,0.3808880090713501
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,96,8,128,1,fp8,fp8,0,0.3791840076446533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,96,96,128,1,float16,float16,0,0.5998032093048096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,96,1,128,1,float16,float16,0,0.16112799644470216
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,96,96,128,1,float16,fp8,0,0.5891727924346923
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,96,96,128,1,fp8,fp8,0,0.5885791778564453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,96,1,128,1,float16,fp8,0,0.17112480401992797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,96,1,128,1,fp8,fp8,0,0.17044960260391234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,96,2,128,1,float16,float16,0,0.16813600063323975
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,96,2,128,1,float16,fp8,0,0.17679359912872314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,96,2,128,1,fp8,fp8,0,0.17693760395050048
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,96,4,128,1,float16,float16,0,0.17544480562210082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,96,4,128,1,float16,fp8,0,0.18522080183029174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,96,4,128,1,fp8,fp8,0,0.18470239639282227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,96,8,128,1,float16,float16,0,0.19417760372161866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,96,8,128,1,float16,fp8,0,0.2030400037765503
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,96,8,128,1,fp8,fp8,0,0.2022768020629883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,96,96,128,1,float16,float16,0,0.3114160060882568
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,96,96,128,1,float16,fp8,0,0.3067055940628052
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,96,96,128,1,fp8,fp8,0,0.3063375949859619
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,96,1,128,1,float16,float16,0,0.09387519955635071
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,96,1,128,1,float16,fp8,0,0.09881119728088379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,96,1,128,1,fp8,fp8,0,0.09926559925079345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,96,2,128,1,float16,float16,0,0.09417920112609864
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,96,2,128,1,float16,fp8,0,0.09903039932250976
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,96,2,128,1,fp8,fp8,0,0.10006239414215087
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,96,4,128,1,float16,float16,0,0.09934560060501099
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,96,4,128,1,float16,fp8,0,0.10613919496536255
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,96,4,128,1,fp8,fp8,0,0.10591360330581664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,96,8,128,1,float16,float16,0,0.10883519649505616
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,96,8,128,1,float16,fp8,0,0.11315679550170898
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,96,8,128,1,fp8,fp8,0,0.1132383942604065
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,96,96,128,1,float16,float16,0,0.16847519874572753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,96,96,128,1,float16,fp8,0,0.1648576021194458
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,96,96,128,1,fp8,fp8,0,0.16549760103225708
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,96,1,128,1,float16,float16,0,0.058847999572753905
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,96,1,128,1,float16,fp8,0,0.057548797130584715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,96,1,128,1,fp8,fp8,0,0.058140802383422854
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,96,2,128,1,float16,float16,0,0.06005600094795227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,96,2,128,1,float16,fp8,0,0.05868800282478333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,96,2,128,1,fp8,fp8,0,0.058417600393295285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,96,4,128,1,float16,float16,0,0.060812801122665405
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,96,4,128,1,float16,fp8,0,0.059280002117156984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,96,4,128,1,fp8,fp8,0,0.059222400188446045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,96,8,128,1,float16,float16,0,0.06674240231513977
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,96,8,128,1,float16,fp8,0,0.0663424015045166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,96,8,128,1,fp8,fp8,0,0.06608160138130188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,96,96,128,1,float16,float16,0,0.09456959962844849
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,96,96,128,1,float16,fp8,0,0.09160640239715576
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,96,96,128,1,fp8,fp8,0,0.0910975992679596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,96,1,128,1,float16,float16,0,0.035641598701477054
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,96,1,128,1,float16,fp8,0,0.037780800461769105
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,96,1,128,1,fp8,fp8,0,0.03739199936389923
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,96,2,128,1,float16,float16,0,0.035683199763298035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,96,2,128,1,float16,fp8,0,0.037308800220489505
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,96,4,128,1,float16,float16,0,0.03630079925060272
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,96,2,128,1,fp8,fp8,0,0.03779839873313904
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,96,4,128,1,float16,fp8,0,0.038008001446723935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,96,4,128,1,fp8,fp8,0,0.038099199533462524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,96,8,128,1,float16,float16,0,0.03681280016899109
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,96,8,128,1,float16,fp8,0,0.03891200125217438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,96,8,128,1,fp8,fp8,0,0.0384799987077713
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,96,96,128,1,float16,float16,0,0.05273600220680237
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,96,96,128,1,float16,fp8,0,0.05050240159034729
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,96,2,128,1,float16,fp8,0,0.02764959931373596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,96,96,128,1,fp8,fp8,0,0.05010560154914856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,96,1,128,1,float16,float16,0,0.027587199211120607
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,96,1,128,1,float16,fp8,0,0.027369600534439088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,96,1,128,1,fp8,fp8,0,0.027195200324058533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,96,2,128,1,float16,float16,0,0.027608001232147218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,96,2,128,1,fp8,fp8,0,0.02739199995994568
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,96,4,128,1,float16,float16,0,0.028036800026893616
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,96,4,128,1,float16,fp8,0,0.027611199021339416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,96,4,128,1,fp8,fp8,0,0.027758398652076723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,96,8,128,1,float16,float16,0,0.02815839946269989
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,96,8,128,1,float16,fp8,0,0.02794719934463501
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,96,1,128,1,float16,float16,0,1.0379823684692382
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,96,8,128,1,fp8,fp8,0,0.027774399518966673
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,96,1,128,1,float16,fp8,0,1.143819236755371
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,96,1,128,1,fp8,fp8,0,1.1338224411010742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,96,2,128,1,float16,float16,0,1.0714799880981445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,96,2,128,1,float16,fp8,0,1.180339241027832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,96,2,128,1,fp8,fp8,0,1.1698368072509766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,96,4,128,1,float16,float16,0,1.1481200218200684
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,96,4,128,1,float16,fp8,0,1.2391599655151366
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,96,4,128,1,fp8,fp8,0,1.2375568389892577
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,96,8,128,1,float16,float16,0,1.2979663848876952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,96,8,128,1,float16,fp8,0,1.3940464019775392
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,96,8,128,1,fp8,fp8,0,1.3844847679138184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,96,1,128,1,float16,float16,0,0.5290944099426269
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,96,1,128,1,float16,fp8,0,0.5770847797393799
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,96,1,128,1,fp8,fp8,0,0.5821023941040039
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,96,96,128,1,float16,float16,0,2.256500816345215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,96,2,128,1,float16,float16,0,0.5476111888885498
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,96,96,128,1,float16,fp8,0,2.277302360534668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,96,96,128,1,fp8,fp8,0,2.277241516113281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,96,2,128,1,float16,fp8,0,0.5936448097229003
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,96,2,128,1,fp8,fp8,0,0.5953104019165039
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,96,4,128,1,float16,float16,0,0.5837423801422119
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,96,4,128,1,float16,fp8,0,0.6357327938079834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,96,4,128,1,fp8,fp8,0,0.6357264041900634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,96,8,128,1,float16,float16,0,0.6573520183563233
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,96,96,128,1,float16,float16,0,1.1422127723693847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,96,8,128,1,float16,fp8,0,0.7059840202331543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,96,8,128,1,fp8,fp8,0,0.7064832210540771
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,96,1,128,1,float16,float16,0,0.2787359952926636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,96,96,128,1,float16,fp8,0,1.149841594696045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,96,1,128,1,float16,fp8,0,0.30440640449523926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,96,96,128,1,fp8,fp8,0,1.1500351905822754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,96,1,128,1,fp8,fp8,0,0.30103359222412107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,96,2,128,1,float16,float16,0,0.28595681190490724
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,96,2,128,1,float16,fp8,0,0.31082561016082766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,96,2,128,1,fp8,fp8,0,0.31261119842529295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,96,4,128,1,float16,float16,0,0.3050607919692993
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,96,4,128,1,float16,fp8,0,0.3303647994995117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,96,4,128,1,fp8,fp8,0,0.3290431976318359
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,96,8,128,1,float16,float16,0,0.3384671926498413
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,96,8,128,1,float16,fp8,0,0.36475200653076173
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,96,8,128,1,fp8,fp8,0,0.36521599292755125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,96,96,128,1,float16,float16,0,0.5839776039123535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,96,96,128,1,float16,fp8,0,0.5873311996459961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,96,96,128,1,fp8,fp8,0,0.5881728172302246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,96,1,128,1,float16,float16,0,0.15047999620437622
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,96,1,128,1,float16,fp8,0,0.1624959945678711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,96,1,128,1,fp8,fp8,0,0.16278400421142578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,96,2,128,1,float16,float16,0,0.15416959524154664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,96,2,128,1,float16,fp8,0,0.17003680467605592
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,96,2,128,1,fp8,fp8,0,0.1674288034439087
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,96,4,128,1,float16,float16,0,0.16368800401687622
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,96,4,128,1,float16,fp8,0,0.17550400495529175
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,96,4,128,1,fp8,fp8,0,0.17522720098495484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,96,8,128,1,float16,float16,0,0.18272800445556642
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,96,8,128,1,float16,fp8,0,0.1943120002746582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,96,8,128,1,fp8,fp8,0,0.19472320079803468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,96,96,128,1,float16,float16,0,0.3060703992843628
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,96,96,128,1,float16,fp8,0,0.30347518920898436
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,96,96,128,1,fp8,fp8,0,0.3040848016738892
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,96,1,128,1,float16,float16,0,0.08864319920539857
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,96,1,128,1,float16,fp8,0,0.0934112012386322
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,96,1,128,1,fp8,fp8,0,0.09372640252113343
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,96,2,128,1,float16,float16,0,0.08889920115470887
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,96,2,128,1,float16,fp8,0,0.09374880194664001
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,96,2,128,1,fp8,fp8,0,0.09423999786376953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,96,4,128,1,float16,float16,0,0.09468479752540589
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,96,4,128,1,float16,fp8,0,0.1003424048423767
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,96,4,128,1,fp8,fp8,0,0.09994239807128906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,96,8,128,1,float16,float16,0,0.10226080417633057
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,96,8,128,1,float16,fp8,0,0.10750720500946045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,96,8,128,1,fp8,fp8,0,0.10748640298843384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,96,96,128,1,float16,float16,0,0.1644544005393982
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,96,96,128,1,float16,fp8,0,0.16072479486465455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,96,96,128,1,fp8,fp8,0,0.16046080589294434
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,96,1,128,1,float16,float16,0,0.05572959780693054
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,96,1,128,1,float16,fp8,0,0.05276640057563782
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,96,1,128,1,fp8,fp8,0,0.05373119711875916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,96,2,128,1,float16,float16,0,0.056201601028442384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,96,2,128,1,float16,fp8,0,0.05351679921150208
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,96,2,128,1,fp8,fp8,0,0.05411520004272461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,96,4,128,1,float16,float16,0,0.056497597694396974
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,96,4,128,1,float16,fp8,0,0.054529601335525514
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,96,4,128,1,fp8,fp8,0,0.05477759838104248
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,96,8,128,1,float16,float16,0,0.062491202354431154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,96,8,128,1,float16,fp8,0,0.062174397706985476
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,96,8,128,1,fp8,fp8,0,0.062161600589752196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,96,96,128,1,float16,float16,0,0.09247040152549743
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,96,96,128,1,float16,fp8,0,0.0851472020149231
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,96,96,128,1,fp8,fp8,0,0.08596959710121155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,96,1,128,1,float16,float16,0,0.03223839998245239
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,96,1,128,1,float16,fp8,0,0.03405919969081879
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,96,1,128,1,fp8,fp8,0,0.03407360017299652
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,96,2,128,1,float16,float16,0,0.03253119885921478
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,96,2,128,1,float16,fp8,0,0.03409120142459869
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,96,2,128,1,fp8,fp8,0,0.034113600850105286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,96,4,128,1,float16,float16,0,0.03265120089054108
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,96,4,128,1,float16,fp8,0,0.03419199883937836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,96,4,128,1,fp8,fp8,0,0.033852800726890564
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,96,8,128,1,float16,float16,0,0.03479839861392975
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,96,8,128,1,float16,fp8,0,0.03515360057353974
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,96,8,128,1,fp8,fp8,0,0.03518239855766296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,96,96,128,1,float16,float16,0,0.05046399831771851
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,96,2,128,1,float16,float16,0,0.026208001375198364
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,96,96,128,1,float16,fp8,0,0.04772480130195618
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,96,96,128,1,fp8,fp8,0,0.047788798809051514
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,96,1,128,1,float16,float16,0,0.02605920135974884
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,96,1,128,1,float16,fp8,0,0.027027198672294618
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,96,1,128,1,fp8,fp8,0,0.02735520005226135
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,96,2,128,1,float16,fp8,0,0.027457600831985472
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,96,2,128,1,fp8,fp8,0,0.02747359871864319
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,96,4,128,1,float16,float16,0,0.026337599754333495
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,96,4,128,1,float16,fp8,0,0.02733440101146698
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,96,4,128,1,fp8,fp8,0,0.027532801032066345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,96,8,128,1,float16,float16,0,0.026812800765037538
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,96,8,128,1,float16,fp8,0,0.027484801411628724
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,96,8,128,1,fp8,fp8,0,0.02754879891872406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,96,96,128,1,float16,float16,0,0.030632001161575318
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,96,96,128,1,float16,fp8,0,0.031249600648880004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,96,96,128,1,fp8,fp8,0,0.03105120062828064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,96,1,128,1,float16,float16,0,0.020656000077724456
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,96,1,128,1,float16,fp8,0,0.021564799547195434
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,96,1,128,1,fp8,fp8,0,0.021249599754810333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,96,2,128,1,float16,float16,0,0.020905600488185884
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,96,2,128,1,float16,fp8,0,0.021303999423980712
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,96,2,128,1,fp8,fp8,0,0.021620799601078034
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,96,4,128,1,float16,float16,0,0.02098879963159561
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,96,4,128,1,float16,fp8,0,0.021686400473117828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,96,4,128,1,fp8,fp8,0,0.021966400742530822
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,96,8,128,1,float16,float16,0,0.021422399580478667
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,96,8,128,1,float16,fp8,0,0.02173440009355545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,96,1,128,1,float16,fp8,0,0.5761712074279786
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,96,8,128,1,fp8,fp8,0,0.021748800575733186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,96,1,128,1,float16,float16,0,0.5355728149414063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,96,1,128,1,fp8,fp8,0,0.5754208087921142
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,96,2,128,1,float16,float16,0,0.5495344161987304
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,96,2,128,1,float16,fp8,0,0.5967743873596192
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,96,2,128,1,fp8,fp8,0,0.5995120048522949
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,96,4,128,1,float16,float16,0,0.5844639778137207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,96,4,128,1,float16,fp8,0,0.6296319961547852
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,96,4,128,1,fp8,fp8,0,0.6332128047943115
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,96,8,128,1,float16,float16,0,0.6556352138519287
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,96,8,128,1,float16,fp8,0,0.7048848152160645
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,96,8,128,1,fp8,fp8,0,0.7045072078704834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,96,1,128,1,float16,float16,0,0.2794496059417725
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,96,96,128,1,float16,float16,0,1.3182512283325196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,96,1,128,1,float16,fp8,0,0.30187358856201174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,96,1,128,1,fp8,fp8,0,0.3017983913421631
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,96,96,128,1,float16,fp8,0,1.3574576377868652
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,96,96,128,1,fp8,fp8,0,1.3547120094299316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,96,2,128,1,float16,float16,0,0.2868880033493042
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,96,2,128,1,float16,fp8,0,0.31197919845581057
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,96,4,128,1,float16,float16,0,0.30375840663909914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,96,2,128,1,fp8,fp8,0,0.31203839778900144
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,96,4,128,1,float16,fp8,0,0.32563199996948244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,96,4,128,1,fp8,fp8,0,0.32692320346832277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,96,8,128,1,float16,float16,0,0.3395711898803711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,96,1,128,1,float16,float16,0,0.1491312026977539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,96,8,128,1,float16,fp8,0,0.36375999450683594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,96,8,128,1,fp8,fp8,0,0.3647471904754639
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,96,96,128,1,float16,float16,0,0.6732367992401123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,96,96,128,1,float16,fp8,0,0.6902431964874267
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,96,1,128,1,float16,fp8,0,0.16281919479370116
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,96,96,128,1,fp8,fp8,0,0.6896512031555175
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,96,1,128,1,fp8,fp8,0,0.16295679807662963
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,96,2,128,1,float16,float16,0,0.15568480491638184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,96,2,128,1,float16,fp8,0,0.16743520498275757
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,96,2,128,1,fp8,fp8,0,0.1692911982536316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,96,4,128,1,float16,float16,0,0.1650607943534851
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,96,4,128,1,float16,fp8,0,0.17317279577255248
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,96,4,128,1,fp8,fp8,0,0.1764847993850708
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,96,8,128,1,float16,float16,0,0.1831071972846985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,96,8,128,1,float16,fp8,0,0.1952064037322998
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,96,1,128,1,float16,fp8,0,0.09339839816093445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,96,8,128,1,fp8,fp8,0,0.19412959814071656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,96,96,128,1,float16,float16,0,0.34804320335388184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,96,96,128,1,float16,fp8,0,0.3579312086105347
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,96,96,128,1,fp8,fp8,0,0.35708160400390626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,96,1,128,1,float16,float16,0,0.08788319826126098
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,96,1,128,1,fp8,fp8,0,0.0940559983253479
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,96,2,128,1,float16,float16,0,0.08926560282707215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,96,2,128,1,float16,fp8,0,0.09452959895133972
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,96,2,128,1,fp8,fp8,0,0.09390400052070617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,96,4,128,1,float16,float16,0,0.09433280229568482
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,96,4,128,1,float16,fp8,0,0.09972479939460754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,96,4,128,1,fp8,fp8,0,0.10029120445251465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,96,8,128,1,float16,float16,0,0.10183680057525635
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,96,8,128,1,float16,fp8,0,0.10823040008544922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,96,8,128,1,fp8,fp8,0,0.10789120197296143
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,96,96,128,1,float16,float16,0,0.18711999654769898
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,96,96,128,1,float16,fp8,0,0.18810720443725587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,96,96,128,1,fp8,fp8,0,0.18765439987182617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,96,1,128,1,float16,float16,0,0.05529599785804749
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,96,1,128,1,float16,fp8,0,0.05193120241165161
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,96,1,128,1,fp8,fp8,0,0.0530784010887146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,96,2,128,1,float16,float16,0,0.05569599866867066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,96,2,128,1,float16,fp8,0,0.05321279764175415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,96,2,128,1,fp8,fp8,0,0.053427201509475705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,96,4,128,1,float16,float16,0,0.056694400310516355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,96,4,128,1,float16,fp8,0,0.054020798206329344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,96,4,128,1,fp8,fp8,0,0.054307198524475096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,96,8,128,1,float16,float16,0,0.06291679739952087
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,96,8,128,1,float16,fp8,0,0.06153280138969421
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,96,8,128,1,fp8,fp8,0,0.06043199896812439
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,96,96,128,1,float16,float16,0,0.1046064019203186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,96,96,128,1,float16,fp8,0,0.09953119754791259
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,96,96,128,1,fp8,fp8,0,0.09964320063591003
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,96,1,128,1,float16,float16,0,0.03187519907951355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,96,1,128,1,float16,fp8,0,0.03394559919834137
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,96,1,128,1,fp8,fp8,0,0.03409439921379089
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,96,2,128,1,float16,float16,0,0.032201600074768064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,96,2,128,1,float16,fp8,0,0.0338016003370285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,96,2,128,1,fp8,fp8,0,0.034088000655174255
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,96,4,128,1,float16,float16,0,0.0324864000082016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,96,4,128,1,float16,fp8,0,0.034227201342582704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,96,4,128,1,fp8,fp8,0,0.03389439880847931
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,96,8,128,1,float16,float16,0,0.033416000008583066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,96,8,128,1,float16,fp8,0,0.035776001214981076
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,96,8,128,1,fp8,fp8,0,0.03520320057868957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,96,96,128,1,float16,float16,0,0.05414559841156006
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,96,96,128,1,float16,fp8,0,0.05223360061645508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,96,96,128,1,fp8,fp8,0,0.05187360048294067
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,96,1,128,1,float16,float16,0,0.025960001349449157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,96,1,128,1,float16,fp8,0,0.02694559991359711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,96,1,128,1,fp8,fp8,0,0.02722879946231842
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,96,2,128,1,float16,float16,0,0.026126399636268616
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,96,2,128,1,float16,fp8,0,0.027422401309013366
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,96,2,128,1,fp8,fp8,0,0.02735840082168579
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,96,4,128,1,float16,float16,0,0.026241600513458252
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,96,4,128,1,float16,fp8,0,0.027241599559783936
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,96,4,128,1,fp8,fp8,0,0.027452799677848815
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,96,8,128,1,float16,float16,0,0.026182401180267333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,96,8,128,1,float16,fp8,0,0.0275983989238739
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,96,8,128,1,fp8,fp8,0,0.027564799785614012
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,96,96,128,1,float16,float16,0,0.034046399593353274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,96,96,128,1,float16,fp8,0,0.035580798983573914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,96,96,128,1,fp8,fp8,0,0.035252800583839415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,96,1,128,1,float16,float16,0,0.020683200657367708
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,96,1,128,1,float16,fp8,0,0.02131360024213791
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,96,1,128,1,fp8,fp8,0,0.021798400580883025
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,96,2,128,1,float16,float16,0,0.02082560062408447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,96,2,128,1,float16,fp8,0,0.02167679965496063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,96,2,128,1,fp8,fp8,0,0.021593600511550903
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,96,4,128,1,float16,float16,0,0.02099999934434891
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,96,4,128,1,float16,fp8,0,0.02147520035505295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,96,4,128,1,fp8,fp8,0,0.021976000070571898
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,96,8,128,1,float16,float16,0,0.021193599700927733
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,96,8,128,1,float16,fp8,0,0.022014400362968443
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,96,8,128,1,fp8,fp8,0,0.021814399957656862
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,96,96,128,1,float16,float16,0,0.0235615998506546
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,96,96,128,1,float16,fp8,0,0.024633599817752837
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,96,96,128,1,fp8,fp8,0,0.024292799830436706
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,96,1,128,1,float16,float16,0,0.020127999782562255
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,96,1,128,1,float16,fp8,0,0.021139200031757354
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,96,1,128,1,fp8,fp8,0,0.02094399929046631
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,96,2,128,1,float16,float16,0,0.020452800393104553
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,96,2,128,1,float16,fp8,0,0.021278400719165803
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,96,4,128,1,float16,float16,0,0.020313599705696107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,96,2,128,1,fp8,fp8,0,0.02112800031900406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,96,4,128,1,float16,fp8,0,0.021372799575328828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,96,4,128,1,fp8,fp8,0,0.021403199434280394
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,96,8,128,1,float16,float16,0,0.020545600354671477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,96,8,128,1,float16,fp8,0,0.021352000534534454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,96,8,128,1,fp8,fp8,0,0.02125920057296753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,96,1,128,1,float16,float16,0,0.27778561115264894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,96,1,128,1,float16,fp8,0,0.30531361103057864
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,96,4,128,1,float16,float16,0,0.3040800094604492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,96,1,128,1,fp8,fp8,0,0.30280001163482667
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,96,2,128,1,float16,float16,0,0.2857232093811035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,96,2,128,1,float16,fp8,0,0.31068480014801025
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,96,2,128,1,fp8,fp8,0,0.31317598819732667
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,96,8,128,1,fp8,fp8,0,0.3645296096801758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,96,4,128,1,float16,fp8,0,0.3290015935897827
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,96,4,128,1,fp8,fp8,0,0.32887840270996094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,96,8,128,1,float16,float16,0,0.3409775972366333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,96,8,128,1,float16,fp8,0,0.36722400188446047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,96,1,128,1,float16,float16,0,0.15005600452423096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,96,96,128,1,float16,float16,0,0.8519760131835937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,96,1,128,1,float16,fp8,0,0.16341279745101928
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,96,96,128,1,float16,fp8,0,0.9015664100646973
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,96,1,128,1,fp8,fp8,0,0.1638208031654358
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,96,96,128,1,fp8,fp8,0,0.9007087707519531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,96,2,128,1,float16,float16,0,0.1575744032859802
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,96,2,128,1,float16,fp8,0,0.1705471992492676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,96,2,128,1,fp8,fp8,0,0.1685920000076294
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,96,4,128,1,float16,float16,0,0.16196800470352174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,96,4,128,1,float16,fp8,0,0.17623519897460938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,96,4,128,1,fp8,fp8,0,0.17206079959869386
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,96,8,128,1,float16,float16,0,0.1824944019317627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,96,8,128,1,float16,fp8,0,0.19602880477905274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,96,8,128,1,fp8,fp8,0,0.19568480253219606
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,96,96,128,1,float16,float16,0,0.44010558128356936
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,96,2,128,1,float16,float16,0,0.09073920249938965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,96,96,128,1,float16,fp8,0,0.4614416122436523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,96,1,128,1,float16,float16,0,0.08884959816932678
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,96,96,128,1,fp8,fp8,0,0.46384482383728026
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,96,1,128,1,float16,fp8,0,0.09467359781265258
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,96,1,128,1,fp8,fp8,0,0.09442560076713562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,96,2,128,1,float16,fp8,0,0.09544640183448791
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,96,2,128,1,fp8,fp8,0,0.09533920288085937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,96,4,128,1,float16,float16,0,0.09656320214271545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,96,4,128,1,float16,fp8,0,0.10046080350875855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,96,4,128,1,fp8,fp8,0,0.10065920352935791
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,96,8,128,1,float16,float16,0,0.10396959781646728
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,96,8,128,1,float16,fp8,0,0.10885599851608277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,96,8,128,1,fp8,fp8,0,0.10885119438171387
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,96,96,128,1,float16,float16,0,0.23280320167541504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,96,96,128,1,float16,fp8,0,0.244215989112854
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,96,96,128,1,fp8,fp8,0,0.2437903881072998
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,96,1,128,1,float16,float16,0,0.055692797899246214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,96,1,128,1,float16,fp8,0,0.05335680246353149
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,96,1,128,1,fp8,fp8,0,0.054124802350997925
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,96,2,128,1,float16,float16,0,0.055795198678970336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,96,2,128,1,float16,fp8,0,0.053758400678634646
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,96,2,128,1,fp8,fp8,0,0.054054397344589236
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,96,4,128,1,float16,float16,0,0.057175999879837035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,96,4,128,1,float16,fp8,0,0.05469599962234497
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,96,4,128,1,fp8,fp8,0,0.054092800617218016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,96,8,128,1,float16,float16,0,0.06304640173912049
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,96,8,128,1,float16,fp8,0,0.06076480150222778
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,96,8,128,1,fp8,fp8,0,0.06128000020980835
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,96,96,128,1,float16,float16,0,0.12634079456329345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,96,96,128,1,float16,fp8,0,0.12753280401229858
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,96,96,128,1,fp8,fp8,0,0.12745280265808107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,96,1,128,1,float16,float16,0,0.031769600510597226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,96,1,128,1,float16,fp8,0,0.03385440111160278
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,96,1,128,1,fp8,fp8,0,0.034088000655174255
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,96,2,128,1,float16,float16,0,0.03207519948482514
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,96,2,128,1,float16,fp8,0,0.03399679958820343
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,96,2,128,1,fp8,fp8,0,0.033990401029586795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,96,4,128,1,float16,float16,0,0.03249599933624268
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,96,4,128,1,float16,fp8,0,0.03400959968566895
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,96,4,128,1,fp8,fp8,0,0.03397440016269684
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,96,96,128,1,float16,fp8,0,0.06595199704170226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,96,8,128,1,float16,float16,0,0.03341920077800751
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,96,8,128,1,float16,fp8,0,0.034990400075912476
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,96,8,128,1,fp8,fp8,0,0.03492000102996826
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,96,96,128,1,float16,float16,0,0.06486240029335022
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,96,96,128,1,fp8,fp8,0,0.06576480269432068
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,96,1,128,1,float16,float16,0,0.025940799713134767
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,96,1,128,1,float16,fp8,0,0.0273391991853714
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,96,1,128,1,fp8,fp8,0,0.02744640111923218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,96,2,128,1,float16,float16,0,0.026086398959159852
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,96,2,128,1,float16,fp8,0,0.0274944007396698
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,96,2,128,1,fp8,fp8,0,0.02731040120124817
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,96,4,128,1,float16,float16,0,0.026134398579597474
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,96,4,128,1,float16,fp8,0,0.027654400467872618
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,96,4,128,1,fp8,fp8,0,0.027556800842285158
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,96,8,128,1,float16,float16,0,0.026449599862098695
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,96,8,128,1,float16,fp8,0,0.02792479991912842
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,96,8,128,1,fp8,fp8,0,0.027726399898529052
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,96,96,128,1,float16,float16,0,0.037564799189567566
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,96,96,128,1,float16,fp8,0,0.03967039883136749
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,96,96,128,1,fp8,fp8,0,0.03985599875450134
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,96,1,128,1,float16,float16,0,0.020814399421215057
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,96,1,128,1,float16,fp8,0,0.021353599429130555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,96,1,128,1,fp8,fp8,0,0.02155199944972992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,96,2,128,1,float16,float16,0,0.02057439982891083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,96,2,128,1,float16,fp8,0,0.02144480049610138
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,96,2,128,1,fp8,fp8,0,0.021566399931907655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,96,4,128,1,float16,float16,0,0.021140800416469575
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,96,4,128,1,float16,fp8,0,0.02184319943189621
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,96,4,128,1,fp8,fp8,0,0.021958400309085847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,96,8,128,1,float16,float16,0,0.021241599321365358
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,96,8,128,1,float16,fp8,0,0.021822400391101837
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,96,8,128,1,fp8,fp8,0,0.022060799598693847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,96,96,128,1,float16,float16,0,0.027286401391029357
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,96,96,128,1,float16,fp8,0,0.028600001335144044
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,96,96,128,1,fp8,fp8,0,0.02882080078125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,96,1,128,1,float16,float16,0,0.0200655996799469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,96,1,128,1,float16,fp8,0,0.021006399393081666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,96,1,128,1,fp8,fp8,0,0.021115200221538545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,96,2,128,1,float16,float16,0,0.020206399261951447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,96,2,128,1,float16,fp8,0,0.021030400693416596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,96,2,128,1,fp8,fp8,0,0.02099840044975281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,96,4,128,1,float16,float16,0,0.020291200280189513
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,96,4,128,1,float16,fp8,0,0.021480000019073485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,96,4,128,1,fp8,fp8,0,0.020916800200939178
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,96,8,128,1,float16,float16,0,0.02059520035982132
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,96,8,128,1,float16,fp8,0,0.021423999965190888
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,96,8,128,1,fp8,fp8,0,0.02112800031900406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,96,96,128,1,float16,float16,0,0.021476800739765167
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,96,96,128,1,float16,fp8,0,0.021729600429534913
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,96,96,128,1,fp8,fp8,0,0.02194399982690811
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,96,1,128,1,float16,float16,0,0.019808000326156615
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,96,1,128,1,float16,fp8,0,0.020467199385166168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,96,1,128,1,fp8,fp8,0,0.020425599813461304
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,96,2,128,1,float16,float16,0,0.020017600059509276
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,96,2,128,1,float16,fp8,0,0.020390400290489198
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,96,2,128,1,fp8,fp8,0,0.020692799985408784
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,96,4,128,1,float16,float16,0,0.019995200634002685
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,96,4,128,1,float16,fp8,0,0.020953600108623505
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,96,4,128,1,fp8,fp8,0,0.02085919976234436
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,96,8,128,1,float16,float16,0,0.020187200605869295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,96,2,128,1,float16,float16,0,0.15765119791030885
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,96,8,128,1,float16,fp8,0,0.0209647998213768
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,96,8,128,1,fp8,fp8,0,0.02131039947271347
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,96,1,128,1,float16,float16,0,0.14929920434951782
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,96,1,128,1,float16,fp8,0,0.1644063949584961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,96,1,128,1,fp8,fp8,0,0.1655743956565857
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,96,2,128,1,float16,fp8,0,0.17150720357894897
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,96,2,128,1,fp8,fp8,0,0.1711680054664612
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,96,4,128,1,float16,float16,0,0.16534719467163086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,96,4,128,1,float16,fp8,0,0.17765120267868043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,96,4,128,1,fp8,fp8,0,0.17740479707717896
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,96,96,128,1,float16,float16,0,0.6256095886230468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,96,8,128,1,float16,float16,0,0.2089695930480957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,96,8,128,1,float16,fp8,0,0.22500159740447997
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,96,8,128,1,fp8,fp8,0,0.2261728048324585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,96,1,128,1,float16,float16,0,0.08810719847679138
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,96,96,128,1,float16,fp8,0,0.687886381149292
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,96,1,128,1,float16,fp8,0,0.09449440240859985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,96,96,128,1,fp8,fp8,0,0.688049602508545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,96,1,128,1,fp8,fp8,0,0.09503999948501587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,96,2,128,1,float16,float16,0,0.08851199746131896
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,96,2,128,1,float16,fp8,0,0.09560160040855407
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,96,2,128,1,fp8,fp8,0,0.09575679898262024
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,96,8,128,1,fp8,fp8,0,0.12503679990768432
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,96,4,128,1,float16,float16,0,0.09513919949531555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,96,4,128,1,float16,fp8,0,0.10139679908752441
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,96,4,128,1,fp8,fp8,0,0.10093120336532593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,96,8,128,1,float16,float16,0,0.11650079488754272
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,96,8,128,1,float16,fp8,0,0.12563999891281127
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,96,96,128,1,float16,float16,0,0.3255728006362915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,96,1,128,1,float16,float16,0,0.055979198217391966
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,96,96,128,1,float16,fp8,0,0.3566783905029297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,96,96,128,1,fp8,fp8,0,0.3547359943389893
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,96,1,128,1,float16,fp8,0,0.05355200171470642
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,96,1,128,1,fp8,fp8,0,0.05415840148925781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,96,2,128,1,float16,float16,0,0.0563152015209198
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,96,2,128,1,float16,fp8,0,0.05351200103759766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,96,2,128,1,fp8,fp8,0,0.05390560030937195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,96,4,128,1,float16,float16,0,0.05797920227050781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,96,4,128,1,float16,fp8,0,0.055103999376296994
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,96,4,128,1,fp8,fp8,0,0.05548160076141358
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,96,8,128,1,float16,float16,0,0.07063999772071838
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,96,8,128,1,float16,fp8,0,0.06998080015182495
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,96,8,128,1,fp8,fp8,0,0.07052479982376099
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,96,96,128,1,float16,float16,0,0.17329920530319215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,96,96,128,1,float16,fp8,0,0.18551360368728637
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,96,96,128,1,fp8,fp8,0,0.18299360275268556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,96,2,128,1,fp8,fp8,0,0.03442879915237427
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,96,1,128,1,float16,float16,0,0.03240160048007965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,96,1,128,1,float16,fp8,0,0.034236800670623777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,96,1,128,1,fp8,fp8,0,0.034318399429321286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,96,2,128,1,float16,float16,0,0.032206401228904724
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,96,2,128,1,float16,fp8,0,0.03433600068092346
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,96,4,128,1,float16,float16,0,0.03287039995193482
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,96,4,128,1,float16,fp8,0,0.034815999865531924
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,96,4,128,1,fp8,fp8,0,0.03456799983978272
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,96,8,128,1,float16,float16,0,0.03680799901485443
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,96,8,128,1,float16,fp8,0,0.039959999918937686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,96,8,128,1,fp8,fp8,0,0.03986560106277466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,96,96,128,1,float16,float16,0,0.08789759874343872
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,96,96,128,1,float16,fp8,0,0.0929040014743805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,96,96,128,1,fp8,fp8,0,0.09355520009994507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,96,1,128,1,float16,float16,0,0.026001599431037904
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,96,1,128,1,float16,fp8,0,0.027430400252342224
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,96,1,128,1,fp8,fp8,0,0.027142399549484254
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,96,2,128,1,float16,float16,0,0.026070401072502136
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,96,2,128,1,float16,fp8,0,0.027584001421928406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,96,2,128,1,fp8,fp8,0,0.027515199780464173
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,96,4,128,1,float16,float16,0,0.02598879933357239
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,96,4,128,1,float16,fp8,0,0.027611199021339416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,96,4,128,1,fp8,fp8,0,0.027508801221847533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,96,8,128,1,float16,float16,0,0.026041600108146667
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,96,8,128,1,float16,fp8,0,0.02774080038070679
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,96,8,128,1,fp8,fp8,0,0.02778880000114441
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,96,96,128,1,float16,float16,0,0.04877760112285614
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,96,96,128,1,float16,fp8,0,0.05354560017585754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,96,96,128,1,fp8,fp8,0,0.053692799806594846
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,96,1,128,1,float16,float16,0,0.02074880003929138
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,96,1,128,1,float16,fp8,0,0.02181600034236908
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,96,1,128,1,fp8,fp8,0,0.021275199949741364
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,96,2,128,1,float16,float16,0,0.020819200575351714
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,96,2,128,1,float16,fp8,0,0.02181279957294464
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,96,2,128,1,fp8,fp8,0,0.021347199380397797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,96,4,128,1,float16,float16,0,0.02091040015220642
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,96,4,128,1,float16,fp8,0,0.022009600698947907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,96,4,128,1,fp8,fp8,0,0.022036799788475038
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,96,8,128,1,float16,float16,0,0.02109439969062805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,96,8,128,1,float16,fp8,0,0.021814399957656862
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,96,8,128,1,fp8,fp8,0,0.02189439982175827
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,96,96,128,1,float16,float16,0,0.031385600566864014
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,96,96,128,1,float16,fp8,0,0.033292800188064575
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,96,96,128,1,fp8,fp8,0,0.03334240019321442
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,96,1,128,1,float16,float16,0,0.02024320065975189
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,96,1,128,1,float16,fp8,0,0.021270400285720824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,96,1,128,1,fp8,fp8,0,0.020942400395870208
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,96,2,128,1,float16,float16,0,0.0202224001288414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,96,2,128,1,float16,fp8,0,0.02098879963159561
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,96,2,128,1,fp8,fp8,0,0.02144159972667694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,96,4,128,1,float16,float16,0,0.020398400723934174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,96,4,128,1,float16,fp8,0,0.021110400557518005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,96,4,128,1,fp8,fp8,0,0.021279999613761903
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,96,1,128,1,float16,float16,0,0.01979999989271164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,96,8,128,1,float16,float16,0,0.020584000647068022
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,96,8,128,1,float16,fp8,0,0.021243199706077576
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,96,8,128,1,fp8,fp8,0,0.021264000236988066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,96,96,128,1,float16,float16,0,0.024833600223064422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,96,96,128,1,float16,fp8,0,0.02661919891834259
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,96,96,128,1,fp8,fp8,0,0.02619360089302063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,96,1,128,1,float16,fp8,0,0.020529599487781526
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,96,1,128,1,fp8,fp8,0,0.020638400316238405
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,96,2,128,1,float16,float16,0,0.020054399967193604
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,96,2,128,1,float16,fp8,0,0.020873600244522096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,96,2,128,1,fp8,fp8,0,0.020579199492931365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,96,4,128,1,float16,float16,0,0.02011200040578842
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,96,4,128,1,float16,fp8,0,0.02096640020608902
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,96,4,128,1,fp8,fp8,0,0.02083359956741333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,96,8,128,1,float16,float16,0,0.02008959949016571
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,96,8,128,1,float16,fp8,0,0.02128159999847412
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,96,8,128,1,fp8,fp8,0,0.021321600675582884
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,96,96,128,1,float16,float16,0,0.020628799498081208
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,96,96,128,1,float16,fp8,0,0.02125120013952255
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,96,96,128,1,fp8,fp8,0,0.02173759937286377
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,96,1,128,1,float16,float16,0,0.019516800343990327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,96,1,128,1,float16,fp8,0,0.0203247994184494
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,96,1,128,1,fp8,fp8,0,0.020151999592781068
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,96,2,128,1,float16,float16,0,0.01953279972076416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,96,2,128,1,float16,fp8,0,0.020628799498081208
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,96,2,128,1,fp8,fp8,0,0.02044160068035126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,96,4,128,1,float16,float16,0,0.01964000016450882
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,96,4,128,1,float16,fp8,0,0.020603199303150178
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,96,4,128,1,fp8,fp8,0,0.020654399693012238
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,96,8,128,1,float16,float16,0,0.0199072003364563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,96,8,128,1,float16,fp8,0,0.020895999670028687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,96,8,128,1,fp8,fp8,0,0.020428800582885744
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,96,1,128,1,float16,float16,0,0.03019680082798004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,96,1,128,1,float16,fp8,0,0.03169119954109192
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,96,1,128,1,fp8,fp8,0,0.03175199925899506
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,96,2,128,1,float16,float16,0,0.03728159964084625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,96,2,128,1,float16,fp8,0,0.040524798631668094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,96,2,128,1,fp8,fp8,0,0.04055840075016022
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,96,8,128,1,float16,fp8,0,0.09413759708404541
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,96,4,128,1,float16,float16,0,0.051920002698898314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,96,4,128,1,float16,fp8,0,0.058430397510528566
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,96,4,128,1,fp8,fp8,0,0.05842080116271973
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,96,8,128,1,float16,float16,0,0.08060640096664429
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,96,8,128,1,fp8,fp8,0,0.09431999921798706
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,96,96,128,1,float16,float16,0,0.36917600631713865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,96,96,128,1,float16,fp8,0,0.45708317756652833
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,96,1,128,1,float16,float16,0,0.022878399491310118
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,96,96,128,1,fp8,fp8,0,0.45683679580688474
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,96,1,128,1,float16,fp8,0,0.02391200065612793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,96,1,128,1,fp8,fp8,0,0.023558400571346283
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,96,2,128,1,float16,float16,0,0.02659519910812378
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,96,2,128,1,float16,fp8,0,0.028203201293945313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,96,2,128,1,fp8,fp8,0,0.028329598903656005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,96,4,128,1,float16,float16,0,0.03432320058345795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,96,4,128,1,float16,fp8,0,0.03752799928188324
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,96,96,128,1,float16,float16,0,0.1938223958015442
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,96,4,128,1,fp8,fp8,0,0.03762879967689514
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,96,1,128,1,float16,float16,0,0.021080000698566435
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,96,8,128,1,float16,float16,0,0.04879519939422607
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,96,8,128,1,float16,fp8,0,0.05564320087432861
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,96,8,128,1,fp8,fp8,0,0.055225598812103274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,96,96,128,1,float16,fp8,0,0.2356031894683838
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,96,96,128,1,fp8,fp8,0,0.2361680030822754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,96,1,128,1,float16,fp8,0,0.02194560021162033
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,96,1,128,1,fp8,fp8,0,0.0221328005194664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,96,2,128,1,float16,float16,0,0.021236799657344818
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,96,2,128,1,float16,fp8,0,0.02250239998102188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,96,2,128,1,fp8,fp8,0,0.022044800221920013
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,96,4,128,1,float16,float16,0,0.024987199902534486
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,96,4,128,1,float16,fp8,0,0.026923200488090514
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,96,4,128,1,fp8,fp8,0,0.026700800657272337
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,96,8,128,1,float16,float16,0,0.03267680108547211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,96,8,128,1,float16,fp8,0,0.03596960008144379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,96,8,128,1,fp8,fp8,0,0.035883200168609616
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,96,96,128,1,float16,float16,0,0.10685759782791138
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,96,96,128,1,float16,fp8,0,0.1267840027809143
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,96,96,128,1,fp8,fp8,0,0.12747199535369874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,96,1,128,1,float16,float16,0,0.020107200741767882
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,96,1,128,1,float16,fp8,0,0.020883199572563172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,96,1,128,1,fp8,fp8,0,0.02091519981622696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,96,2,128,1,float16,float16,0,0.020476800203323365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,96,2,128,1,float16,fp8,0,0.02115200012922287
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,96,2,128,1,fp8,fp8,0,0.02136960029602051
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,96,4,128,1,float16,float16,0,0.020491200685501098
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,96,4,128,1,float16,fp8,0,0.02149440050125122
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,96,4,128,1,fp8,fp8,0,0.021617600321769716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,96,8,128,1,float16,float16,0,0.024355199933052064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,96,8,128,1,float16,fp8,0,0.02598080039024353
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,96,8,128,1,fp8,fp8,0,0.026096001267433167
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,96,96,128,1,float16,float16,0,0.06223520040512085
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,96,96,128,1,float16,fp8,0,0.0723360002040863
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,96,96,128,1,fp8,fp8,0,0.07115359902381897
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,96,1,128,1,float16,float16,0,0.019732800126075745
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,96,1,128,1,float16,fp8,0,0.020662400126457214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,96,1,128,1,fp8,fp8,0,0.020292800664901734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,96,2,128,1,float16,float16,0,0.019886399805545806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,96,2,128,1,float16,fp8,0,0.020576000213623047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,96,2,128,1,fp8,fp8,0,0.020351999998092653
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,96,4,128,1,float16,float16,0,0.019920000433921815
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,96,4,128,1,float16,fp8,0,0.020420800149440765
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,96,4,128,1,fp8,fp8,0,0.020769600570201874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,96,8,128,1,float16,float16,0,0.020319999754428865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,96,8,128,1,float16,fp8,0,0.02112639993429184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,96,8,128,1,fp8,fp8,0,0.0211776003241539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,96,96,128,1,float16,float16,0,0.039556801319122314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,96,96,128,1,float16,fp8,0,0.04395520091056824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,96,96,128,1,fp8,fp8,0,0.0445279985666275
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,96,1,128,1,float16,float16,0,0.019388799369335175
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,96,1,128,1,float16,fp8,0,0.020020799338817598
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,96,1,128,1,fp8,fp8,0,0.020216000080108643
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,96,2,128,1,float16,float16,0,0.019232000410556793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,96,2,128,1,float16,fp8,0,0.020467199385166168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,96,2,128,1,fp8,fp8,0,0.020360000431537628
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,96,4,128,1,float16,float16,0,0.019270400702953338
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,96,4,128,1,float16,fp8,0,0.020369599759578704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,96,4,128,1,fp8,fp8,0,0.02027200013399124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,96,8,128,1,float16,float16,0,0.01969279944896698
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,96,8,128,1,float16,fp8,0,0.02081120014190674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,96,8,128,1,fp8,fp8,0,0.020683200657367708
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,96,96,128,1,float16,float16,0,0.027980801463127137
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,96,96,128,1,float16,fp8,0,0.03015359938144684
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,96,96,128,1,fp8,fp8,0,0.029985600709915163
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,96,1,128,1,float16,float16,0,0.0190528005361557
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,96,1,128,1,float16,fp8,0,0.019683200120925903
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,96,1,128,1,fp8,fp8,0,0.019817599654197694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,96,2,128,1,float16,float16,0,0.0192208006978035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,96,2,128,1,float16,fp8,0,0.020113599300384522
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,96,2,128,1,fp8,fp8,0,0.019627200067043306
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,96,4,128,1,float16,float16,0,0.01942239999771118
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,96,4,128,1,float16,fp8,0,0.019950400292873382
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,96,4,128,1,fp8,fp8,0,0.02030239999294281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,96,8,128,1,float16,float16,0,0.019334399700164796
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,96,8,128,1,float16,fp8,0,0.020139199495315552
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,96,8,128,1,fp8,fp8,0,0.020472000539302825
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,96,96,128,1,float16,float16,0,0.023398399353027344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,96,96,128,1,float16,fp8,0,0.025016000866889952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,96,96,128,1,fp8,fp8,0,0.02512960135936737
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,96,1,128,1,float16,float16,0,0.017159999907016756
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,96,1,128,1,float16,fp8,0,0.01810719966888428
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,96,1,128,1,fp8,fp8,0,0.01813279986381531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,96,2,128,1,float16,float16,0,0.018910400569438934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,96,2,128,1,float16,fp8,0,0.019704000651836397
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,96,2,128,1,fp8,fp8,0,0.01961279958486557
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,96,4,128,1,float16,float16,0,0.019233599305152893
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,96,4,128,1,float16,fp8,0,0.02004159986972809
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,96,4,128,1,fp8,fp8,0,0.019811199605464937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,96,8,128,1,float16,float16,0,0.019083200395107268
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,96,8,128,1,float16,fp8,0,0.01995519995689392
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,96,8,128,1,fp8,fp8,0,0.019844800233840942
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,96,96,128,1,float16,float16,0,0.019782400131225585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,96,96,128,1,float16,fp8,0,0.020559999346733093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,96,96,128,1,fp8,fp8,0,0.0208064004778862
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,96,1,128,1,float16,float16,0,0.016390399634838106
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,96,1,128,1,float16,fp8,0,0.017316800355911256
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,96,1,128,1,fp8,fp8,0,0.017347200214862822
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,96,2,128,1,float16,float16,0,0.01703999936580658
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,96,2,128,1,float16,fp8,0,0.0179967999458313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,96,2,128,1,fp8,fp8,0,0.017844800651073457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,96,4,128,1,float16,float16,0,0.018718400597572328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,96,4,128,1,float16,fp8,0,0.019513599574565887
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,96,4,128,1,fp8,fp8,0,0.01964319944381714
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,96,8,128,1,float16,float16,0,0.01881439983844757
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,96,8,128,1,float16,fp8,0,0.019654400646686554
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,96,8,128,1,fp8,fp8,0,0.01964640021324158
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,64,1,128,1,float16,fp8,0,51.5600341796875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,64,1,128,1,fp8,fp8,0,52.492578125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,64,2,128,1,fp8,fp8,0,51.7817138671875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,64,2,128,1,float16,fp8,0,52.257269287109374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,64,4,128,1,float16,fp8,0,52.475677490234375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,64,1,128,1,float16,float16,0,61.54813842773437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,64,2,128,1,float16,float16,0,61.66318359375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,64,4,128,1,float16,float16,0,62.85950927734375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,64,64,128,1,float16,float16,0,35.793798828125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,64,64,128,1,float16,fp8,0,30.072650146484374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,64,64,128,1,fp8,fp8,0,29.848724365234375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,64,1,128,1,float16,float16,0,31.444497680664064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,64,4,128,1,fp8,fp8,0,51.995538330078126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,64,8,128,1,float16,fp8,0,52.522271728515626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,64,8,128,1,fp8,fp8,0,53.179376220703126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,64,1,128,1,float16,fp8,0,25.7949951171875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,64,8,128,1,float16,float16,0,64.51399536132813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,64,1,128,1,fp8,fp8,0,26.03101806640625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,64,2,128,1,float16,fp8,0,26.15386962890625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,64,2,128,1,float16,float16,0,31.078829956054687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,64,2,128,1,fp8,fp8,0,26.001675415039063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,64,4,128,1,float16,fp8,0,26.503536987304688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,64,4,128,1,float16,float16,0,31.36376953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,64,64,128,1,float16,float16,0,17.76111755371094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,64,4,128,1,fp8,fp8,0,26.419866943359374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,64,8,128,1,float16,fp8,0,26.53272705078125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,64,64,128,1,float16,fp8,0,14.817410278320313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,64,8,128,1,fp8,fp8,0,26.65020751953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,64,8,128,1,float16,float16,0,31.754718017578124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,64,64,128,1,fp8,fp8,0,14.950672912597657
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,64,1,128,1,float16,float16,0,15.154769897460938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,64,1,128,1,float16,fp8,0,12.727793884277343
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,64,1,128,1,fp8,fp8,0,12.966864013671875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,64,2,128,1,float16,fp8,0,12.895805358886719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,64,2,128,1,float16,float16,0,15.42391357421875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,64,2,128,1,fp8,fp8,0,13.138948059082031
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,64,4,128,1,float16,fp8,0,12.978509521484375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,64,4,128,1,float16,float16,0,15.822550964355468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,64,4,128,1,fp8,fp8,0,12.898651123046875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,64,8,128,1,float16,fp8,0,13.254751586914063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,64,64,128,1,float16,float16,0,8.7069091796875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,64,64,128,1,float16,fp8,0,7.566960144042969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,64,64,128,1,fp8,fp8,0,7.607012939453125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,64,8,128,1,float16,float16,0,15.647509765625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,64,8,128,1,fp8,fp8,0,13.285740661621094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,64,1,128,1,float16,fp8,0,6.410150146484375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,64,1,128,1,float16,float16,0,7.5805107116699215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,64,1,128,1,fp8,fp8,0,6.4480224609375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,64,2,128,1,float16,fp8,0,6.395748901367187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,64,2,128,1,fp8,fp8,0,6.50015869140625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,64,2,128,1,float16,float16,0,7.60034408569336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,64,4,128,1,float16,fp8,0,6.373404693603516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,64,4,128,1,float16,float16,0,7.9459228515625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,64,4,128,1,fp8,fp8,0,6.396323013305664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,64,8,128,1,float16,float16,0,7.813549041748047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,64,8,128,1,float16,fp8,0,6.603572845458984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,64,8,128,1,fp8,fp8,0,6.522430419921875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,64,1,128,1,float16,fp8,0,29.157760620117188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,64,1,128,1,fp8,fp8,0,29.520352172851563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,64,2,128,1,float16,fp8,0,29.682229614257814
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,64,2,128,1,fp8,fp8,0,30.105377197265625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,64,1,128,1,float16,float16,0,35.498617553710936
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,64,4,128,1,float16,fp8,0,30.318295288085938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,64,2,128,1,float16,float16,0,35.024411010742185
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,64,4,128,1,float16,float16,0,35.83860473632812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,64,64,128,1,float16,fp8,0,17.983726501464844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,64,64,128,1,fp8,fp8,0,17.673187255859375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,64,64,128,1,float16,float16,0,20.569175720214844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,64,1,128,1,float16,float16,0,17.51038055419922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,64,4,128,1,fp8,fp8,0,29.560025024414063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,64,8,128,1,float16,fp8,0,30.182864379882812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,64,8,128,1,fp8,fp8,0,30.895382690429688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,64,8,128,1,float16,float16,0,35.75492858886719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,64,1,128,1,float16,fp8,0,14.511769104003907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,64,1,128,1,fp8,fp8,0,14.82794189453125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,64,2,128,1,float16,fp8,0,14.956954956054688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,64,2,128,1,float16,float16,0,17.698822021484375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,64,2,128,1,fp8,fp8,0,14.866217041015625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,64,4,128,1,float16,fp8,0,15.114286804199219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,64,4,128,1,float16,float16,0,17.633747863769532
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,64,4,128,1,fp8,fp8,0,14.54326934814453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,64,64,128,1,float16,fp8,0,8.856712341308594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,64,8,128,1,float16,fp8,0,14.993862915039063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,64,64,128,1,float16,float16,0,10.408367919921876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,64,8,128,1,float16,float16,0,18.370834350585938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,64,1,128,1,float16,fp8,0,7.193167877197266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,64,64,128,1,fp8,fp8,0,9.03513412475586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,64,1,128,1,float16,float16,0,8.69403533935547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,64,8,128,1,fp8,fp8,0,14.921072387695313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,64,1,128,1,fp8,fp8,0,7.23636474609375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,64,2,128,1,float16,fp8,0,7.3716178894042965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,64,2,128,1,float16,float16,0,9.207305908203125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,64,2,128,1,fp8,fp8,0,7.294000244140625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,64,4,128,1,float16,fp8,0,7.581196594238281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,64,4,128,1,fp8,fp8,0,7.542208099365235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,64,4,128,1,float16,float16,0,8.93138427734375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,64,8,128,1,float16,float16,0,8.970037078857422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,64,64,128,1,float16,fp8,0,4.451825714111328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,64,8,128,1,float16,fp8,0,7.419193267822266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,64,64,128,1,float16,float16,0,5.242846298217773
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,64,64,128,1,fp8,fp8,0,4.505646514892578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,64,1,128,1,float16,float16,0,4.440353775024414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,64,1,128,1,float16,fp8,0,3.7509647369384767
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,64,8,128,1,fp8,fp8,0,7.496630096435547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,64,1,128,1,fp8,fp8,0,3.8300544738769533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,64,2,128,1,float16,float16,0,4.233977508544922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,64,2,128,1,float16,fp8,0,3.6086158752441406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,64,2,128,1,fp8,fp8,0,3.7308639526367187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,64,4,128,1,float16,fp8,0,3.734204864501953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,64,4,128,1,fp8,fp8,0,3.6426830291748047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,64,4,128,1,float16,float16,0,4.440031814575195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,64,8,128,1,float16,fp8,0,3.703046417236328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,64,8,128,1,float16,float16,0,4.401094436645508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,64,8,128,1,fp8,fp8,0,3.817844772338867
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,64,1,128,1,float16,fp8,0,20.642231750488282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,64,1,128,1,fp8,fp8,0,20.719148254394533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,64,2,128,1,fp8,fp8,0,20.393699645996094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,64,2,128,1,float16,fp8,0,20.965130615234376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,64,1,128,1,float16,float16,0,24.656399536132813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,64,4,128,1,float16,fp8,0,20.71949157714844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,64,2,128,1,float16,float16,0,24.294085693359374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,64,4,128,1,float16,float16,0,25.427218627929687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,64,64,128,1,float16,fp8,0,12.914634704589844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,64,64,128,1,fp8,fp8,0,13.160270690917969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,64,64,128,1,float16,float16,0,14.943849182128906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,64,1,128,1,float16,float16,0,12.147054290771484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,64,4,128,1,fp8,fp8,0,21.445623779296874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,64,8,128,1,float16,fp8,0,21.606098937988282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,64,8,128,1,fp8,fp8,0,21.99689483642578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,64,8,128,1,float16,float16,0,26.1642822265625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,64,1,128,1,float16,fp8,0,10.391785430908204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,64,1,128,1,fp8,fp8,0,10.11051025390625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,64,2,128,1,float16,fp8,0,10.329927825927735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,64,2,128,1,float16,float16,0,12.321112060546875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,64,2,128,1,fp8,fp8,0,10.372689819335937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,64,4,128,1,float16,fp8,0,10.548983764648437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,64,4,128,1,float16,float16,0,12.269054412841797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,64,4,128,1,fp8,fp8,0,10.454837036132812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,64,64,128,1,float16,float16,0,7.506775665283203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,64,8,128,1,float16,fp8,0,10.428627014160156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,64,64,128,1,float16,fp8,0,6.466441345214844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,64,8,128,1,fp8,fp8,0,10.733601379394532
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,64,8,128,1,float16,float16,0,12.80041046142578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,64,64,128,1,fp8,fp8,0,6.479431915283203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,64,1,128,1,float16,float16,0,6.018641662597656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,64,1,128,1,float16,fp8,0,5.124422454833985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,64,1,128,1,fp8,fp8,0,5.09039535522461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,64,2,128,1,float16,fp8,0,5.122052764892578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,64,2,128,1,float16,float16,0,6.107619094848633
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,64,2,128,1,fp8,fp8,0,5.150897598266601
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,64,4,128,1,float16,fp8,0,5.278430557250976
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,64,4,128,1,float16,float16,0,6.177326583862305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,64,4,128,1,fp8,fp8,0,5.157014465332031
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,64,8,128,1,float16,float16,0,6.451363372802734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,64,64,128,1,float16,fp8,0,3.257107162475586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,64,64,128,1,float16,float16,0,3.784659194946289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,64,8,128,1,float16,fp8,0,5.205952072143555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,64,8,128,1,fp8,fp8,0,5.268463897705078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,64,1,128,1,float16,fp8,0,2.532823944091797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,64,1,128,1,float16,float16,0,2.925820732116699
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,64,64,128,1,fp8,fp8,0,3.267935943603516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,64,1,128,1,fp8,fp8,0,2.6486656188964846
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,64,2,128,1,float16,fp8,0,2.5937503814697265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,64,2,128,1,fp8,fp8,0,2.5458911895751952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,64,2,128,1,float16,float16,0,2.8979583740234376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,64,4,128,1,float16,fp8,0,2.567046356201172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,64,4,128,1,float16,float16,0,2.9773216247558594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,64,4,128,1,fp8,fp8,0,2.6447935104370117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,64,8,128,1,float16,float16,0,3.0445600509643556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,64,8,128,1,float16,fp8,0,2.6101247787475588
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,64,8,128,1,fp8,fp8,0,2.603566360473633
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,64,1,128,1,float16,fp8,0,26.8489501953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,64,1,128,1,fp8,fp8,0,27.17225036621094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,64,2,128,1,float16,fp8,0,27.509405517578124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,64,2,128,1,fp8,fp8,0,27.236312866210938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,64,1,128,1,float16,float16,0,31.880953979492187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,64,2,128,1,float16,float16,0,32.18802490234375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,64,4,128,1,float16,float16,0,32.548989868164064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,64,4,128,1,float16,fp8,0,26.79544677734375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,64,64,128,1,float16,fp8,0,17.57543487548828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,64,64,128,1,float16,float16,0,19.86346893310547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,64,64,128,1,fp8,fp8,0,17.539549255371092
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,64,1,128,1,float16,float16,0,16.72184600830078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,64,4,128,1,fp8,fp8,0,27.541815185546874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,64,8,128,1,float16,fp8,0,28.421664428710937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,64,8,128,1,fp8,fp8,0,28.148162841796875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,64,8,128,1,float16,float16,0,33.60964660644531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,64,1,128,1,float16,fp8,0,13.242398071289063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,64,1,128,1,fp8,fp8,0,13.483656311035157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,64,2,128,1,float16,fp8,0,13.28846435546875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,64,2,128,1,float16,float16,0,15.764498901367187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,64,2,128,1,fp8,fp8,0,13.621829223632812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,64,4,128,1,float16,fp8,0,13.982264709472656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,64,4,128,1,float16,float16,0,17.230178833007812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,64,4,128,1,fp8,fp8,0,13.563151550292968
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,64,64,128,1,float16,fp8,0,8.844624328613282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,64,8,128,1,float16,fp8,0,13.744706726074218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,64,64,128,1,float16,float16,0,10.49779815673828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,64,8,128,1,float16,float16,0,16.885618591308592
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,64,64,128,1,fp8,fp8,0,8.844678497314453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,64,8,128,1,fp8,fp8,0,13.653791809082032
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,64,1,128,1,float16,fp8,0,6.6605377197265625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,64,1,128,1,float16,float16,0,7.9295501708984375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,64,1,128,1,fp8,fp8,0,6.661064147949219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,64,2,128,1,float16,fp8,0,6.651172637939453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,64,2,128,1,float16,float16,0,8.013735961914062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,64,2,128,1,fp8,fp8,0,6.6751251220703125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,64,4,128,1,float16,fp8,0,6.713549041748047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,64,4,128,1,fp8,fp8,0,6.796222686767578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,64,4,128,1,float16,float16,0,8.08414535522461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,64,8,128,1,float16,float16,0,8.26768798828125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,64,8,128,1,float16,fp8,0,6.91009750366211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,64,64,128,1,float16,float16,0,4.927297592163086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,64,64,128,1,float16,fp8,0,4.41926383972168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,64,1,128,1,float16,fp8,0,3.382891082763672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,64,1,128,1,float16,float16,0,3.9499439239501952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,64,64,128,1,fp8,fp8,0,4.432566452026367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,64,8,128,1,fp8,fp8,0,6.830249786376953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,64,1,128,1,fp8,fp8,0,3.4056190490722655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,64,2,128,1,float16,float16,0,3.77166862487793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,64,2,128,1,float16,fp8,0,3.3516639709472655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,64,2,128,1,fp8,fp8,0,3.3233055114746093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,64,4,128,1,float16,float16,0,3.9570655822753906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,64,4,128,1,fp8,fp8,0,3.350507354736328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,64,4,128,1,float16,fp8,0,3.4102142333984373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,64,8,128,1,float16,float16,0,3.956980895996094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,64,8,128,1,float16,fp8,0,3.4748561859130858
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,64,64,128,1,float16,float16,0,2.370148849487305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,64,64,128,1,float16,fp8,0,2.2256000518798826
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,64,1,128,1,float16,float16,0,1.7982528686523438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,64,1,128,1,float16,fp8,0,1.7014944076538085
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,64,64,128,1,fp8,fp8,0,2.3159423828125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,64,1,128,1,fp8,fp8,0,1.7575567245483399
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,64,8,128,1,fp8,fp8,0,3.6631023406982424
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,64,2,128,1,float16,float16,0,1.8538768768310547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,64,2,128,1,float16,fp8,0,1.7035919189453126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,64,2,128,1,fp8,fp8,0,1.6688335418701172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,64,4,128,1,float16,float16,0,1.8675296783447266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,64,4,128,1,float16,fp8,0,1.682699203491211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,64,4,128,1,fp8,fp8,0,1.682035255432129
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,64,8,128,1,float16,float16,0,1.8538015365600586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,64,8,128,1,float16,fp8,0,1.7108816146850585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,64,8,128,1,fp8,fp8,0,1.805900764465332
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,64,1,128,1,float16,fp8,0,15.264411926269531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,64,1,128,1,fp8,fp8,0,15.246849060058594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,64,2,128,1,float16,fp8,0,15.660462951660156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,64,2,128,1,fp8,fp8,0,15.3948974609375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,64,1,128,1,float16,float16,0,17.99882354736328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,64,4,128,1,float16,fp8,0,15.973263549804688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,64,2,128,1,float16,float16,0,18.407321166992187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,64,4,128,1,float16,float16,0,18.758198547363282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,64,1,128,1,float16,float16,0,9.151438140869141
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,64,64,128,1,float16,fp8,0,10.971644592285156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,64,64,128,1,float16,float16,0,12.490499114990234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,64,64,128,1,fp8,fp8,0,10.981924438476563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,64,4,128,1,fp8,fp8,0,15.836630249023438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,64,8,128,1,float16,fp8,0,16.52178039550781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,64,8,128,1,fp8,fp8,0,16.087112426757812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,64,8,128,1,float16,float16,0,19.66417694091797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,64,1,128,1,float16,fp8,0,7.794891357421875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,64,1,128,1,fp8,fp8,0,7.9271484375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,64,2,128,1,float16,fp8,0,7.891754913330078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,64,2,128,1,fp8,fp8,0,7.8084770202636715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,64,2,128,1,float16,float16,0,9.313330841064452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,64,4,128,1,float16,fp8,0,7.82657470703125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,64,4,128,1,float16,float16,0,9.593504333496094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,64,4,128,1,fp8,fp8,0,7.824684906005859
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,64,64,128,1,float16,fp8,0,5.544996643066407
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,64,64,128,1,float16,float16,0,6.046811294555664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,64,8,128,1,float16,fp8,0,8.112413024902343
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,64,8,128,1,float16,float16,0,9.482510375976563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,64,8,128,1,fp8,fp8,0,8.064486694335937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,64,64,128,1,fp8,fp8,0,5.498031997680664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,64,1,128,1,float16,float16,0,4.378263854980469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,64,1,128,1,float16,fp8,0,3.815705490112305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,64,1,128,1,fp8,fp8,0,3.8485504150390626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,64,2,128,1,float16,float16,0,4.536908721923828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,64,2,128,1,fp8,fp8,0,3.8689998626708983
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,64,2,128,1,float16,fp8,0,4.016185760498047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,64,4,128,1,float16,float16,0,4.519038391113281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,64,4,128,1,float16,fp8,0,3.9774913787841797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,64,4,128,1,fp8,fp8,0,3.8946704864501953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,64,64,128,1,float16,float16,0,3.0119535446166994
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,64,64,128,1,float16,fp8,0,2.7537408828735352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,64,1,128,1,float16,float16,0,2.073736000061035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,64,8,128,1,float16,fp8,0,4.004483032226562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,64,8,128,1,float16,float16,0,4.652308654785156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,64,64,128,1,fp8,fp8,0,2.8289791107177735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,64,8,128,1,fp8,fp8,0,4.134891128540039
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,64,1,128,1,float16,fp8,0,1.9252592086791993
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,64,1,128,1,fp8,fp8,0,1.9644336700439453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,64,2,128,1,float16,float16,0,2.1287439346313475
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,64,2,128,1,float16,fp8,0,2.0079023361206056
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,64,2,128,1,fp8,fp8,0,1.9342111587524413
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,64,4,128,1,float16,float16,0,2.1457151412963866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,64,4,128,1,float16,fp8,0,1.9591360092163086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,64,4,128,1,fp8,fp8,0,1.9828655242919921
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,64,8,128,1,float16,float16,0,2.2106464385986326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,64,8,128,1,float16,fp8,0,2.010532760620117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,64,64,128,1,float16,float16,0,1.4913295745849608
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,64,64,128,1,float16,fp8,0,1.3930720329284667
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,64,1,128,1,float16,float16,0,1.0124336242675782
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,64,8,128,1,fp8,fp8,0,2.0939119338989256
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,64,1,128,1,float16,fp8,0,0.9872639656066895
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,64,64,128,1,fp8,fp8,0,1.4644816398620606
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,64,1,128,1,fp8,fp8,0,1.0469440460205077
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,64,2,128,1,float16,float16,0,1.0482687950134277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,64,2,128,1,float16,fp8,0,0.9957247734069824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,64,2,128,1,fp8,fp8,0,0.9758367538452148
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,64,4,128,1,float16,float16,0,1.0462384223937988
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,64,4,128,1,float16,fp8,0,0.9954239845275878
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,64,4,128,1,fp8,fp8,0,0.9941295623779297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,64,8,128,1,float16,float16,0,1.0883135795593262
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,64,8,128,1,fp8,fp8,0,1.1296095848083496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,64,8,128,1,float16,fp8,0,1.0252351760864258
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,64,1,128,1,float16,fp8,0,14.488494873046875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,64,1,128,1,fp8,fp8,0,14.296243286132812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,64,2,128,1,float16,fp8,0,14.746343994140625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,64,2,128,1,fp8,fp8,0,14.4739501953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,64,4,128,1,float16,fp8,0,14.721124267578125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,64,1,128,1,float16,float16,0,16.746726989746094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,64,2,128,1,float16,float16,0,17.068292236328126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,64,4,128,1,float16,float16,0,17.541493225097657
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,64,1,128,1,float16,float16,0,8.541948699951172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,64,64,128,1,float16,fp8,0,11.662509155273437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,64,64,128,1,float16,float16,0,12.56164321899414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,64,64,128,1,fp8,fp8,0,11.592427062988282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,64,4,128,1,fp8,fp8,0,14.920529174804688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,64,8,128,1,float16,fp8,0,15.575395202636718
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,64,8,128,1,fp8,fp8,0,15.250457763671875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,64,8,128,1,float16,float16,0,17.955477905273437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,64,1,128,1,float16,fp8,0,7.357713317871093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,64,1,128,1,fp8,fp8,0,7.102001953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,64,2,128,1,float16,fp8,0,7.262217712402344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,64,2,128,1,fp8,fp8,0,7.722617340087891
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,64,2,128,1,float16,float16,0,8.665137481689452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,64,4,128,1,float16,fp8,0,7.5030464172363285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,64,4,128,1,fp8,fp8,0,7.4638816833496096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,64,4,128,1,float16,float16,0,9.528711700439453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,64,8,128,1,float16,float16,0,8.971260833740235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,64,8,128,1,float16,fp8,0,7.583998107910157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,64,64,128,1,float16,fp8,0,5.904140853881836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,64,64,128,1,float16,float16,0,6.284710311889649
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,64,1,128,1,float16,float16,0,4.052215957641602
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,64,1,128,1,float16,fp8,0,3.790180969238281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,64,8,128,1,fp8,fp8,0,7.684088134765625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,64,64,128,1,fp8,fp8,0,5.833422470092773
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,64,1,128,1,fp8,fp8,0,3.6403488159179687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,64,2,128,1,float16,fp8,0,3.63720817565918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,64,2,128,1,float16,float16,0,4.119675064086914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,64,2,128,1,fp8,fp8,0,3.643377685546875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,64,4,128,1,float16,fp8,0,3.7178688049316406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,64,4,128,1,float16,float16,0,4.202420806884765
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,64,4,128,1,fp8,fp8,0,3.684048080444336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,64,8,128,1,float16,float16,0,4.377737426757813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,64,1,128,1,float16,fp8,0,1.7959455490112304
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,64,1,128,1,float16,float16,0,1.9796607971191407
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,64,64,128,1,float16,float16,0,3.106043243408203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,64,64,128,1,float16,fp8,0,2.933955192565918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,64,8,128,1,float16,fp8,0,3.8380191802978514
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,64,64,128,1,fp8,fp8,0,2.947492790222168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,64,1,128,1,fp8,fp8,0,1.8962848663330079
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,64,8,128,1,fp8,fp8,0,3.8347103118896486
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,64,2,128,1,float16,float16,0,1.9940959930419921
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,64,2,128,1,float16,fp8,0,1.947123146057129
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,64,2,128,1,fp8,fp8,0,1.8616367340087892
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,64,4,128,1,float16,float16,0,2.0365503311157225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,64,4,128,1,float16,fp8,0,1.86602725982666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,64,4,128,1,fp8,fp8,0,1.861137580871582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,64,8,128,1,float16,float16,0,2.0756256103515627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,64,8,128,1,float16,fp8,0,1.9165424346923827
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,64,64,128,1,float16,float16,0,1.5340335845947266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,64,8,128,1,fp8,fp8,0,2.0161344528198244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,64,1,128,1,float16,float16,0,0.967409610748291
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,64,64,128,1,float16,fp8,0,1.5497039794921874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,64,64,128,1,fp8,fp8,0,1.4850255966186523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,64,1,128,1,float16,fp8,0,1.0104975700378418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,64,1,128,1,fp8,fp8,0,0.9151136398315429
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,64,2,128,1,float16,float16,0,0.9601008415222168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,64,2,128,1,float16,fp8,0,0.9332320213317871
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,64,2,128,1,fp8,fp8,0,0.9450032234191894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,64,4,128,1,float16,float16,0,1.0018272399902344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,64,4,128,1,float16,fp8,0,0.9456480026245118
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,64,4,128,1,fp8,fp8,0,0.9501728057861328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,64,8,128,1,float16,float16,0,1.0245823860168457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,64,8,128,1,float16,fp8,0,0.977449607849121
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,64,64,128,1,float16,fp8,0,0.7778480052947998
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,64,8,128,1,fp8,fp8,0,0.9688400268554688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,64,1,128,1,float16,float16,0,0.49950718879699707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,64,64,128,1,float16,float16,0,0.7882559776306153
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,64,64,128,1,fp8,fp8,0,0.7548736095428467
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,64,1,128,1,float16,fp8,0,0.479420804977417
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,64,1,128,1,fp8,fp8,0,0.4741343975067139
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,64,2,128,1,float16,float16,0,0.5112607955932618
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,64,2,128,1,float16,fp8,0,0.4743408203125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,64,2,128,1,fp8,fp8,0,0.4758863925933838
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,64,4,128,1,float16,float16,0,0.5076303958892823
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,64,4,128,1,float16,fp8,0,0.48711199760437013
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,64,4,128,1,fp8,fp8,0,0.4850895881652832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,64,8,128,1,float16,float16,0,0.5181680202484131
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,64,8,128,1,float16,fp8,0,0.5002272129058838
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,64,8,128,1,fp8,fp8,0,0.5002463817596435
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,64,1,128,1,float16,fp8,0,8.587940979003907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,64,1,128,1,fp8,fp8,0,8.503457641601562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,64,2,128,1,float16,fp8,0,8.622608184814453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,64,2,128,1,fp8,fp8,0,8.558792114257812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,64,1,128,1,float16,float16,0,9.8160400390625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,64,4,128,1,float16,fp8,0,8.813276672363282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,64,2,128,1,float16,float16,0,9.803511810302734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,64,4,128,1,float16,float16,0,10.232252502441407
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,64,1,128,1,float16,float16,0,4.864828872680664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,64,4,128,1,fp8,fp8,0,8.904555511474609
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,64,64,128,1,float16,fp8,0,7.618681335449219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,64,64,128,1,float16,float16,0,8.053103637695312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,64,64,128,1,fp8,fp8,0,7.68054428100586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,64,8,128,1,float16,fp8,0,9.178982543945313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,64,8,128,1,fp8,fp8,0,9.169564819335937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,64,8,128,1,float16,float16,0,10.646955108642578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,64,1,128,1,float16,fp8,0,4.2846942901611325
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,64,1,128,1,fp8,fp8,0,4.289255905151367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,64,2,128,1,float16,fp8,0,4.299750518798828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,64,2,128,1,float16,float16,0,4.842918395996094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,64,2,128,1,fp8,fp8,0,4.323455810546875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,64,4,128,1,float16,fp8,0,4.3904064178466795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,64,4,128,1,float16,float16,0,4.888441467285157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,64,4,128,1,fp8,fp8,0,4.53454704284668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,64,8,128,1,float16,float16,0,5.260606384277343
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,64,1,128,1,float16,float16,0,2.340355110168457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,64,1,128,1,float16,fp8,0,2.1402000427246093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,64,8,128,1,float16,fp8,0,4.655486297607422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,64,64,128,1,float16,fp8,0,3.812887954711914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,64,64,128,1,float16,float16,0,4.082708740234375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,64,64,128,1,fp8,fp8,0,3.8142673492431642
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,64,8,128,1,fp8,fp8,0,4.6159919738769535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,64,1,128,1,fp8,fp8,0,2.199246406555176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,64,2,128,1,float16,fp8,0,2.170342445373535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,64,2,128,1,float16,float16,0,2.351608085632324
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,64,2,128,1,fp8,fp8,0,2.17095832824707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,64,4,128,1,float16,float16,0,2.31418399810791
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,64,4,128,1,float16,fp8,0,2.223923110961914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,64,4,128,1,fp8,fp8,0,2.2080991744995115
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,64,8,128,1,float16,float16,0,2.494932746887207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,64,8,128,1,float16,fp8,0,2.3158416748046875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,64,64,128,1,float16,float16,0,2.0264015197753906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,64,1,128,1,float16,float16,0,1.1305999755859375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,64,8,128,1,fp8,fp8,0,2.325356864929199
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,64,1,128,1,float16,fp8,0,1.0771455764770508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,64,64,128,1,float16,fp8,0,1.9334623336791992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,64,64,128,1,fp8,fp8,0,1.9855535507202149
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,64,1,128,1,fp8,fp8,0,1.15065279006958
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,64,2,128,1,float16,float16,0,1.149625587463379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,64,2,128,1,float16,fp8,0,1.103105640411377
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,64,2,128,1,fp8,fp8,0,1.1286751747131347
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,64,4,128,1,float16,float16,0,1.189948844909668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,64,4,128,1,float16,fp8,0,1.117199993133545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,64,4,128,1,fp8,fp8,0,1.1210368156433106
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,64,8,128,1,float16,fp8,0,1.17401123046875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,64,8,128,1,float16,float16,0,1.2212016105651855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,64,8,128,1,fp8,fp8,0,1.1916496276855468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,64,64,128,1,float16,float16,0,1.0128095626831055
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,64,1,128,1,float16,fp8,0,0.5531807899475097
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,64,64,128,1,float16,fp8,0,0.9766960144042969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,64,1,128,1,float16,float16,0,0.5706831932067871
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,64,2,128,1,float16,fp8,0,0.5590288162231445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,64,64,128,1,fp8,fp8,0,0.9985119819641113
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,64,1,128,1,fp8,fp8,0,0.553872013092041
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,64,2,128,1,float16,float16,0,0.5920271873474121
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,64,2,128,1,fp8,fp8,0,0.5670335769653321
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,64,4,128,1,float16,float16,0,0.5989007949829102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,64,4,128,1,float16,fp8,0,0.5702000141143799
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,64,4,128,1,fp8,fp8,0,0.5725520133972168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,64,8,128,1,float16,float16,0,0.6243408203125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,64,8,128,1,float16,fp8,0,0.5998576164245606
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,64,8,128,1,fp8,fp8,0,0.6106607913970947
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,64,64,128,1,float16,float16,0,0.5255216121673584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,64,64,128,1,float16,fp8,0,0.5013247966766358
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,64,64,128,1,fp8,fp8,0,0.5012351989746093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,64,1,128,1,float16,float16,0,0.3000767946243286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,64,1,128,1,float16,fp8,0,0.29067680835723875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,64,1,128,1,fp8,fp8,0,0.29270880222320556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,64,2,128,1,float16,float16,0,0.3042128086090088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,64,2,128,1,float16,fp8,0,0.2933504104614258
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,64,2,128,1,fp8,fp8,0,0.2906464099884033
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,64,4,128,1,float16,float16,0,0.31218719482421875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,64,4,128,1,float16,fp8,0,0.3010128021240234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,64,4,128,1,fp8,fp8,0,0.2998703956604004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,64,8,128,1,float16,float16,0,0.3241791963577271
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,64,8,128,1,float16,fp8,0,0.31487360000610354
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,64,8,128,1,fp8,fp8,0,0.31500959396362305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,64,1,128,1,float16,fp8,0,8.33671875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,64,1,128,1,fp8,fp8,0,8.305001831054687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,64,1,128,1,float16,float16,0,9.544966125488282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,64,2,128,1,float16,fp8,0,8.500392150878906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,64,2,128,1,fp8,fp8,0,8.481919860839843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,64,2,128,1,float16,float16,0,9.653020477294922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,64,4,128,1,float16,float16,0,9.78019027709961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,64,4,128,1,float16,fp8,0,8.715801239013672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,64,4,128,1,fp8,fp8,0,8.743144226074218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,64,1,128,1,float16,float16,0,4.465039825439453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,64,8,128,1,float16,fp8,0,9.335775756835938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,64,64,128,1,float16,float16,0,8.91656494140625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,64,8,128,1,fp8,fp8,0,9.293804931640626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,64,8,128,1,float16,float16,0,10.364987182617188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,64,64,128,1,float16,fp8,0,8.667958068847657
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,64,64,128,1,fp8,fp8,0,8.686353302001953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,64,1,128,1,float16,fp8,0,4.158824157714844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,64,1,128,1,fp8,fp8,0,4.210855865478516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,64,2,128,1,float16,fp8,0,4.275475311279297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,64,2,128,1,float16,float16,0,4.789478302001953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,64,2,128,1,fp8,fp8,0,4.239783859252929
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,64,4,128,1,float16,fp8,0,4.359820938110351
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,64,4,128,1,float16,float16,0,4.867641448974609
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,64,4,128,1,fp8,fp8,0,4.398499298095703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,64,1,128,1,float16,float16,0,2.282689666748047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,64,8,128,1,float16,float16,0,5.139595031738281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,64,1,128,1,float16,fp8,0,2.092252731323242
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,64,8,128,1,float16,fp8,0,4.686025619506836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,64,8,128,1,fp8,fp8,0,4.696166229248047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,64,64,128,1,float16,float16,0,4.5172992706298825
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,64,64,128,1,float16,fp8,0,4.3307441711425785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,64,64,128,1,fp8,fp8,0,4.313832092285156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,64,1,128,1,fp8,fp8,0,2.1566511154174806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,64,2,128,1,float16,fp8,0,2.1267488479614256
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,64,2,128,1,float16,float16,0,2.301740837097168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,64,2,128,1,fp8,fp8,0,2.151678466796875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,64,4,128,1,float16,float16,0,2.3410400390625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,64,4,128,1,float16,fp8,0,2.210286331176758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,64,4,128,1,fp8,fp8,0,2.203638458251953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,64,8,128,1,float16,float16,0,2.545684814453125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,64,8,128,1,float16,fp8,0,2.3587295532226564
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,64,8,128,1,fp8,fp8,0,2.3321887969970705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,64,1,128,1,float16,float16,0,1.1081456184387206
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,64,1,128,1,float16,fp8,0,1.0637184143066407
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,64,64,128,1,float16,float16,0,2.280617523193359
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,64,64,128,1,float16,fp8,0,2.1910160064697264
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,64,1,128,1,fp8,fp8,0,1.12161283493042
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,64,64,128,1,fp8,fp8,0,2.230289649963379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,64,2,128,1,float16,float16,0,1.120967960357666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,64,2,128,1,float16,fp8,0,1.0754608154296874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,64,2,128,1,fp8,fp8,0,1.0871567726135254
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,64,4,128,1,float16,float16,0,1.1576592445373535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,64,4,128,1,float16,fp8,0,1.1103792190551758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,64,4,128,1,fp8,fp8,0,1.1116368293762207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,64,8,128,1,float16,float16,0,1.2502991676330566
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,64,8,128,1,float16,fp8,0,1.1935263633728028
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,64,8,128,1,fp8,fp8,0,1.2045840263366698
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,64,64,128,1,float16,float16,0,1.127023983001709
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,64,1,128,1,float16,float16,0,0.5612336158752441
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,64,64,128,1,float16,fp8,0,1.1112879753112792
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,64,1,128,1,float16,fp8,0,0.5449423789978027
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,64,64,128,1,fp8,fp8,0,1.1362144470214843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,64,1,128,1,fp8,fp8,0,0.5824304103851319
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,64,2,128,1,float16,float16,0,0.5731264114379883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,64,2,128,1,float16,fp8,0,0.5575439929962158
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,64,2,128,1,fp8,fp8,0,0.5500671863555908
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,64,4,128,1,float16,float16,0,0.5887919902801514
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,64,4,128,1,float16,fp8,0,0.5724224090576172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,64,4,128,1,fp8,fp8,0,0.5687168121337891
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,64,8,128,1,float16,float16,0,0.6300831794738769
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,64,8,128,1,float16,fp8,0,0.6128352165222168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,64,8,128,1,fp8,fp8,0,0.6088992118835449
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,64,64,128,1,float16,float16,0,0.582476806640625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,64,64,128,1,float16,fp8,0,0.5642655849456787
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,64,1,128,1,float16,float16,0,0.291977596282959
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,64,64,128,1,fp8,fp8,0,0.5677343845367432
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,64,1,128,1,float16,fp8,0,0.2847104072570801
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,64,1,128,1,fp8,fp8,0,0.28583199977874757
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,64,2,128,1,float16,float16,0,0.29892959594726565
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,64,2,128,1,float16,fp8,0,0.29159040451049806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,64,2,128,1,fp8,fp8,0,0.29122400283813477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,64,4,128,1,float16,float16,0,0.30599040985107423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,64,4,128,1,float16,fp8,0,0.2993664026260376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,64,4,128,1,fp8,fp8,0,0.2981328010559082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,64,8,128,1,float16,float16,0,0.32778239250183105
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,64,8,128,1,float16,fp8,0,0.3168463945388794
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,64,8,128,1,fp8,fp8,0,0.31668479442596437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,64,64,128,1,float16,float16,0,0.3052880048751831
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,64,64,128,1,float16,fp8,0,0.294486403465271
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,64,64,128,1,fp8,fp8,0,0.29602560997009275
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,64,1,128,1,float16,float16,0,0.15944160223007203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,64,1,128,1,float16,fp8,0,0.15697280168533326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,64,4,128,1,float16,float16,0,0.16492639780044555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,64,1,128,1,fp8,fp8,0,0.1557152032852173
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,64,2,128,1,float16,float16,0,0.16107840538024903
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,64,2,128,1,float16,fp8,0,0.1569375991821289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,64,2,128,1,fp8,fp8,0,0.15537760257720948
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,64,4,128,1,float16,fp8,0,0.1624559998512268
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,64,4,128,1,fp8,fp8,0,0.16354559659957885
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,64,8,128,1,float16,float16,0,0.17316319942474365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,64,8,128,1,float16,fp8,0,0.17019360065460204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,64,8,128,1,fp8,fp8,0,0.17029919624328613
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,64,1,128,1,float16,fp8,0,5.1519920349121096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,64,1,128,1,float16,float16,0,5.4951904296875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,64,1,128,1,fp8,fp8,0,5.136380767822265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,64,2,128,1,float16,fp8,0,5.2649391174316404
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,64,2,128,1,fp8,fp8,0,5.228176116943359
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,64,2,128,1,float16,float16,0,5.68070068359375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,64,4,128,1,float16,float16,0,5.935171127319336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,64,4,128,1,float16,fp8,0,5.468713760375977
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,64,1,128,1,float16,float16,0,2.7565792083740233
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,64,4,128,1,fp8,fp8,0,5.4543407440185545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,64,8,128,1,float16,fp8,0,5.858819198608399
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,64,8,128,1,float16,float16,0,6.343403244018555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,64,8,128,1,fp8,fp8,0,5.882052612304688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,64,64,128,1,float16,float16,0,6.098382568359375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,64,64,128,1,float16,fp8,0,5.9961200714111325
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,64,64,128,1,fp8,fp8,0,5.9904014587402346
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,64,1,128,1,float16,fp8,0,2.651755142211914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,64,1,128,1,fp8,fp8,0,2.5834720611572264
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,64,2,128,1,float16,float16,0,2.6827999114990235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,64,2,128,1,float16,fp8,0,2.6773008346557616
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,64,2,128,1,fp8,fp8,0,2.6333200454711916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,64,4,128,1,float16,float16,0,2.9374351501464844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,64,4,128,1,float16,fp8,0,2.7637775421142576
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,64,4,128,1,fp8,fp8,0,2.7477951049804688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,64,8,128,1,float16,float16,0,3.1308368682861327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,64,8,128,1,float16,fp8,0,2.9497392654418944
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,64,1,128,1,float16,float16,0,1.3511743545532227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,64,1,128,1,float16,fp8,0,1.306783962249756
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,64,8,128,1,fp8,fp8,0,3.0216096878051757
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,64,1,128,1,fp8,fp8,0,1.3507391929626464
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,64,64,128,1,float16,float16,0,3.0431455612182616
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,64,64,128,1,float16,fp8,0,2.993601608276367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,64,2,128,1,float16,float16,0,1.3696784019470214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,64,64,128,1,fp8,fp8,0,3.0694623947143556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,64,2,128,1,float16,fp8,0,1.3406047821044922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,64,2,128,1,fp8,fp8,0,1.3344063758850098
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,64,4,128,1,float16,float16,0,1.4315024375915528
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,64,4,128,1,float16,fp8,0,1.3925567626953126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,64,4,128,1,fp8,fp8,0,1.3880111694335937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,64,8,128,1,float16,float16,0,1.5317791938781737
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,64,8,128,1,float16,fp8,0,1.497036838531494
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,64,8,128,1,fp8,fp8,0,1.492591953277588
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,64,1,128,1,float16,float16,0,0.6830448150634766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,64,64,128,1,float16,float16,0,1.53920316696167
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,64,64,128,1,float16,fp8,0,1.5077296257019044
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,64,1,128,1,float16,fp8,0,0.6665008068084717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,64,2,128,1,float16,fp8,0,0.6807151794433594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,64,1,128,1,fp8,fp8,0,0.6915247917175293
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,64,64,128,1,fp8,fp8,0,1.5296208381652832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,64,2,128,1,float16,float16,0,0.6958799839019776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,64,2,128,1,fp8,fp8,0,0.6822944164276123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,64,4,128,1,float16,float16,0,0.726966381072998
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,64,4,128,1,float16,fp8,0,0.697979211807251
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,64,4,128,1,fp8,fp8,0,0.7069488048553467
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,64,8,128,1,float16,float16,0,0.7775328159332275
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,64,8,128,1,float16,fp8,0,0.7594064235687256
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,64,8,128,1,fp8,fp8,0,0.760423994064331
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,64,64,128,1,float16,float16,0,0.7839439868927002
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,64,64,128,1,float16,fp8,0,0.7735167980194092
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,64,1,128,1,float16,float16,0,0.3486000061035156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,64,64,128,1,fp8,fp8,0,0.7737648010253906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,64,1,128,1,float16,fp8,0,0.3469871997833252
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,64,1,128,1,fp8,fp8,0,0.3448031902313232
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,64,2,128,1,float16,float16,0,0.3592159986495972
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,64,2,128,1,float16,fp8,0,0.35336320400238036
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,64,2,128,1,fp8,fp8,0,0.3524688005447388
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,64,4,128,1,float16,float16,0,0.3729439973831177
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,64,4,128,1,float16,fp8,0,0.3607791900634766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,64,4,128,1,fp8,fp8,0,0.36713759899139403
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,64,8,128,1,float16,float16,0,0.3981503963470459
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,64,64,128,1,fp8,fp8,0,0.4001904010772705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,64,8,128,1,float16,fp8,0,0.39174718856811525
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,64,8,128,1,fp8,fp8,0,0.39208159446716306
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,64,64,128,1,float16,float16,0,0.4077807903289795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,64,64,128,1,float16,fp8,0,0.3996464014053345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,64,1,128,1,float16,float16,0,0.1876479983329773
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,64,1,128,1,float16,fp8,0,0.18664640188217163
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,64,1,128,1,fp8,fp8,0,0.18378080129623414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,64,2,128,1,float16,float16,0,0.18993760347366334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,64,2,128,1,float16,fp8,0,0.18451839685440063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,64,2,128,1,fp8,fp8,0,0.18735359907150267
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,64,4,128,1,float16,float16,0,0.19713280200958253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,64,4,128,1,float16,fp8,0,0.19205280542373657
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,64,4,128,1,fp8,fp8,0,0.19426560401916504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,64,8,128,1,float16,float16,0,0.21090080738067626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,64,8,128,1,float16,fp8,0,0.20801119804382323
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,64,8,128,1,fp8,fp8,0,0.20621440410614014
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,64,64,128,1,float16,float16,0,0.21658720970153808
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,64,64,128,1,float16,fp8,0,0.21311039924621583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,64,64,128,1,fp8,fp8,0,0.21108479499816896
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,64,1,128,1,float16,float16,0,0.1063088059425354
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,64,2,128,1,fp8,fp8,0,0.10052640438079834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,64,1,128,1,float16,fp8,0,0.10027999877929687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,64,1,128,1,fp8,fp8,0,0.10078719854354859
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,64,2,128,1,float16,float16,0,0.10675519704818726
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,64,2,128,1,float16,fp8,0,0.1016975998878479
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,64,4,128,1,float16,float16,0,0.10871039628982544
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,64,4,128,1,float16,fp8,0,0.10361920595169068
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,64,8,128,1,fp8,fp8,0,0.11312320232391357
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,64,4,128,1,fp8,fp8,0,0.1036911964416504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,64,8,128,1,float16,float16,0,0.11669119596481323
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,64,8,128,1,float16,fp8,0,0.11139359474182128
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,64,1,128,1,float16,fp8,0,5.341675186157227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,64,1,128,1,float16,float16,0,5.693611145019531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,64,1,128,1,fp8,fp8,0,5.385022354125977
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,64,2,128,1,float16,float16,0,5.844500732421875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,64,2,128,1,float16,fp8,0,5.528601455688476
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,64,2,128,1,fp8,fp8,0,5.547329711914062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,64,4,128,1,float16,float16,0,6.0675712585449215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,64,4,128,1,float16,fp8,0,5.8090160369873045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,64,4,128,1,fp8,fp8,0,5.782588958740234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,64,8,128,1,float16,float16,0,6.708869171142578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,64,8,128,1,float16,fp8,0,6.386676788330078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,64,8,128,1,fp8,fp8,0,6.446067047119141
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,64,1,128,1,float16,float16,0,2.7789567947387694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,64,64,128,1,float16,float16,0,7.293224334716797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,64,64,128,1,float16,fp8,0,7.210990142822266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,64,64,128,1,fp8,fp8,0,7.250872039794922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,64,1,128,1,float16,fp8,0,2.702854347229004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,64,1,128,1,fp8,fp8,0,2.7391647338867187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,64,2,128,1,float16,float16,0,2.865100860595703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,64,2,128,1,float16,fp8,0,2.7910255432128905
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,64,2,128,1,fp8,fp8,0,2.807153511047363
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,64,4,128,1,float16,float16,0,2.9816959381103514
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,64,4,128,1,float16,fp8,0,2.9252031326293944
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,64,4,128,1,fp8,fp8,0,2.943729591369629
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,64,8,128,1,float16,float16,0,3.3258129119873048
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,64,8,128,1,float16,fp8,0,3.219468688964844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,64,1,128,1,float16,float16,0,1.3781375885009766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,64,1,128,1,float16,fp8,0,1.3790063858032227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,64,8,128,1,fp8,fp8,0,3.246750259399414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,64,1,128,1,fp8,fp8,0,1.3901503562927247
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,64,64,128,1,float16,float16,0,3.6703311920166017
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,64,64,128,1,float16,fp8,0,3.6422496795654298
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,64,2,128,1,float16,float16,0,1.425774383544922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,64,2,128,1,float16,fp8,0,1.4009663581848144
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,64,64,128,1,fp8,fp8,0,3.6532703399658204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,64,2,128,1,fp8,fp8,0,1.3983839988708495
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,64,4,128,1,float16,float16,0,1.4938847541809082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,64,4,128,1,float16,fp8,0,1.4721983909606933
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,64,4,128,1,fp8,fp8,0,1.466222381591797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,64,8,128,1,float16,float16,0,1.648784065246582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,64,8,128,1,float16,fp8,0,1.6260431289672852
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,64,1,128,1,float16,float16,0,0.6997168064117432
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,64,8,128,1,fp8,fp8,0,1.6122447967529296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,64,1,128,1,float16,fp8,0,0.6927231788635254
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,64,64,128,1,float16,float16,0,1.8473760604858398
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,64,1,128,1,fp8,fp8,0,0.704256010055542
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,64,64,128,1,float16,fp8,0,1.8234479904174805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,64,64,128,1,fp8,fp8,0,1.8305728912353516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,64,2,128,1,float16,float16,0,0.7185215950012207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,64,2,128,1,float16,fp8,0,0.7134287834167481
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,64,2,128,1,fp8,fp8,0,0.714955186843872
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,64,4,128,1,float16,float16,0,0.7492400169372558
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,64,4,128,1,float16,fp8,0,0.7509424209594726
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,64,4,128,1,fp8,fp8,0,0.7518784046173096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,64,8,128,1,float16,float16,0,0.8280495643615723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,64,8,128,1,float16,fp8,0,0.822590446472168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,64,8,128,1,fp8,fp8,0,0.8194416046142579
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,64,64,128,1,fp8,fp8,0,0.9219023704528808
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,64,64,128,1,float16,float16,0,0.9386752128601075
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,64,1,128,1,float16,float16,0,0.36354401111602785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,64,64,128,1,float16,fp8,0,0.9244288444519043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,64,1,128,1,float16,fp8,0,0.36374878883361816
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,64,1,128,1,fp8,fp8,0,0.36018240451812744
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,64,2,128,1,float16,float16,0,0.37332639694213865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,64,2,128,1,float16,fp8,0,0.3712575912475586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,64,2,128,1,fp8,fp8,0,0.3667759895324707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,64,4,128,1,float16,float16,0,0.38909919261932374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,64,4,128,1,float16,fp8,0,0.38761439323425295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,64,4,128,1,fp8,fp8,0,0.3856719970703125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,64,8,128,1,float16,float16,0,0.42746400833129883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,64,8,128,1,float16,fp8,0,0.419374418258667
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,64,8,128,1,fp8,fp8,0,0.4203775882720947
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,64,64,128,1,float16,float16,0,0.48453922271728517
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,64,64,128,1,float16,fp8,0,0.4733071804046631
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,64,1,128,1,float16,float16,0,0.19235680103302003
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,64,64,128,1,fp8,fp8,0,0.475545597076416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,64,1,128,1,float16,fp8,0,0.19118080139160157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,64,1,128,1,fp8,fp8,0,0.19197119474411012
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,64,2,128,1,float16,float16,0,0.19818079471588135
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,64,2,128,1,float16,fp8,0,0.19737119674682618
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,64,2,128,1,fp8,fp8,0,0.19610400199890138
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,64,4,128,1,float16,float16,0,0.20570240020751954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,64,4,128,1,float16,fp8,0,0.20394721031188964
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,64,4,128,1,fp8,fp8,0,0.20515201091766358
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,64,8,128,1,float16,float16,0,0.22601759433746338
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,64,8,128,1,float16,fp8,0,0.22260639667510987
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,64,8,128,1,fp8,fp8,0,0.2223423957824707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,64,64,128,1,float16,float16,0,0.2530287981033325
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,64,64,128,1,float16,fp8,0,0.24758241176605225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,64,64,128,1,fp8,fp8,0,0.24755520820617677
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,64,1,128,1,float16,float16,0,0.10846560001373291
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,64,1,128,1,float16,fp8,0,0.10861760377883911
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,64,1,128,1,fp8,fp8,0,0.10821280479431153
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,64,2,128,1,float16,float16,0,0.10914080142974854
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,64,2,128,1,float16,fp8,0,0.10864160060882569
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,64,2,128,1,fp8,fp8,0,0.10897279977798462
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,64,4,128,1,float16,float16,0,0.11481759548187256
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,64,4,128,1,float16,fp8,0,0.11436640024185181
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,64,4,128,1,fp8,fp8,0,0.11500639915466308
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,64,8,128,1,float16,float16,0,0.12337280511856079
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,64,8,128,1,float16,fp8,0,0.12228480577468873
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,64,8,128,1,fp8,fp8,0,0.12196799516677856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,64,64,128,1,float16,float16,0,0.1368672013282776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,64,64,128,1,float16,fp8,0,0.13420319557189941
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,64,64,128,1,fp8,fp8,0,0.13470079898834228
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,64,1,128,1,float16,float16,0,0.05910720229148865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,64,1,128,1,float16,fp8,0,0.05979679822921753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,64,1,128,1,fp8,fp8,0,0.05982239842414856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,64,2,128,1,float16,float16,0,0.06070240139961243
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,64,2,128,1,float16,fp8,0,0.060171198844909665
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,64,2,128,1,fp8,fp8,0,0.05999360084533691
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,64,4,128,1,float16,float16,0,0.06328319907188415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,64,4,128,1,float16,fp8,0,0.06060799956321716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,64,4,128,1,fp8,fp8,0,0.06175839900970459
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,64,8,128,1,float16,float16,0,0.07028639912605286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,64,8,128,1,float16,fp8,0,0.06627839803695679
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,64,8,128,1,fp8,fp8,0,0.06692000031471253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,64,1,128,1,float16,float16,0,3.911724853515625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,64,1,128,1,float16,fp8,0,3.9471790313720705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,64,2,128,1,float16,float16,0,4.006300735473633
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,64,1,128,1,fp8,fp8,0,3.9371055603027343
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,64,2,128,1,float16,fp8,0,4.108369445800781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,64,2,128,1,fp8,fp8,0,4.081043243408203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,64,4,128,1,float16,float16,0,4.314555358886719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,64,4,128,1,float16,fp8,0,4.36822738647461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,64,1,128,1,float16,float16,0,1.934119987487793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,64,4,128,1,fp8,fp8,0,4.375196838378907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,64,8,128,1,float16,float16,0,4.900961685180664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,64,8,128,1,float16,fp8,0,4.972071838378906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,64,8,128,1,fp8,fp8,0,4.9331615447998045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,64,1,128,1,float16,fp8,0,1.997879981994629
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,64,1,128,1,fp8,fp8,0,1.9860143661499023
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,64,64,128,1,float16,float16,0,6.496920013427735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,64,64,128,1,float16,fp8,0,6.516758728027344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,64,2,128,1,float16,float16,0,2.005940818786621
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,64,64,128,1,fp8,fp8,0,6.558220672607422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,64,2,128,1,float16,fp8,0,2.0681535720825197
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,64,2,128,1,fp8,fp8,0,2.0501455307006835
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,64,4,128,1,float16,float16,0,2.1690303802490236
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,64,4,128,1,float16,fp8,0,2.201684761047363
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,64,4,128,1,fp8,fp8,0,2.1947376251220705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,64,8,128,1,float16,float16,0,2.462454414367676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,64,1,128,1,float16,float16,0,0.9799839973449707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,64,8,128,1,float16,fp8,0,2.4851903915405273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,64,8,128,1,fp8,fp8,0,2.4912368774414064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,64,1,128,1,float16,fp8,0,1.0069215774536133
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,64,64,128,1,float16,float16,0,3.273567962646484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,64,2,128,1,float16,float16,0,1.0093600273132324
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,64,1,128,1,fp8,fp8,0,1.0075167655944823
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,64,64,128,1,float16,fp8,0,3.2703521728515623
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,64,2,128,1,float16,fp8,0,1.0391903877258302
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,64,64,128,1,fp8,fp8,0,3.291107177734375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,64,2,128,1,fp8,fp8,0,1.043398380279541
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,64,4,128,1,float16,float16,0,1.0954959869384766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,64,4,128,1,float16,fp8,0,1.1064255714416504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,64,4,128,1,fp8,fp8,0,1.1002608299255372
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,64,8,128,1,float16,float16,0,1.2441231727600097
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,64,8,128,1,float16,fp8,0,1.24824800491333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,64,8,128,1,fp8,fp8,0,1.25075044631958
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,64,1,128,1,float16,float16,0,0.5010655879974365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,64,1,128,1,float16,fp8,0,0.5101103782653809
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,64,1,128,1,fp8,fp8,0,0.5139279842376709
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,64,64,128,1,float16,fp8,0,1.6490655899047852
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,64,64,128,1,float16,float16,0,1.6409727096557618
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,64,2,128,1,float16,float16,0,0.5204432010650635
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,64,64,128,1,fp8,fp8,0,1.6481855392456055
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,64,4,128,1,float16,fp8,0,0.5657216072082519
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,64,2,128,1,float16,fp8,0,0.5324351787567139
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,64,2,128,1,fp8,fp8,0,0.5312672138214112
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,64,8,128,1,float16,float16,0,0.6322976112365722
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,64,4,128,1,float16,float16,0,0.5535103797912597
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,64,4,128,1,fp8,fp8,0,0.5636367797851562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,64,8,128,1,float16,fp8,0,0.6362095832824707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,64,64,128,1,float16,fp8,0,0.8379631996154785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,64,8,128,1,fp8,fp8,0,0.6359727859497071
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,64,64,128,1,float16,float16,0,0.8361248016357422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,64,1,128,1,float16,float16,0,0.2636735916137695
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,64,1,128,1,float16,fp8,0,0.26797440052032473
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,64,64,128,1,fp8,fp8,0,0.8360832214355469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,64,1,128,1,fp8,fp8,0,0.26829440593719484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,64,2,128,1,float16,float16,0,0.2722559928894043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,64,2,128,1,float16,fp8,0,0.27748000621795654
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,64,2,128,1,fp8,fp8,0,0.27647199630737307
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,64,4,128,1,float16,float16,0,0.29058239459991453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,64,4,128,1,float16,fp8,0,0.29327199459075926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,64,4,128,1,fp8,fp8,0,0.2947312116622925
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,64,8,128,1,float16,float16,0,0.32554240226745607
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,64,8,128,1,float16,fp8,0,0.3291615962982178
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,64,8,128,1,fp8,fp8,0,0.32975680828094484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,64,64,128,1,float16,float16,0,0.43104000091552735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,64,64,128,1,float16,fp8,0,0.43064160346984864
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,64,1,128,1,float16,float16,0,0.14203360080718994
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,64,64,128,1,fp8,fp8,0,0.4305103778839111
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,64,1,128,1,float16,fp8,0,0.14445760250091552
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,64,1,128,1,fp8,fp8,0,0.14506560564041138
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,64,2,128,1,float16,float16,0,0.14836959838867186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,64,2,128,1,float16,fp8,0,0.15118080377578735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,64,2,128,1,fp8,fp8,0,0.15064159631729127
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,64,4,128,1,float16,float16,0,0.15606720447540284
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,64,4,128,1,float16,fp8,0,0.15809600353240966
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,64,4,128,1,fp8,fp8,0,0.15773919820785523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,64,64,128,1,fp8,fp8,0,0.22633121013641358
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,64,8,128,1,float16,float16,0,0.1760543942451477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,64,8,128,1,float16,fp8,0,0.17656160593032838
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,64,1,128,1,fp8,fp8,0,0.08473119735717774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,64,8,128,1,fp8,fp8,0,0.17664480209350586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,64,64,128,1,float16,float16,0,0.22775518894195557
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,64,64,128,1,float16,fp8,0,0.2261120080947876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,64,1,128,1,float16,float16,0,0.08238720297813415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,64,4,128,1,float16,fp8,0,0.09093120098114013
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,64,1,128,1,float16,fp8,0,0.0847760021686554
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,64,2,128,1,float16,float16,0,0.08368800282478332
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,64,8,128,1,float16,fp8,0,0.09851999878883362
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,64,2,128,1,float16,fp8,0,0.08508480191230774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,64,2,128,1,fp8,fp8,0,0.08513439893722534
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,64,4,128,1,float16,float16,0,0.08975039720535279
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,64,4,128,1,fp8,fp8,0,0.09105600118637085
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,64,8,128,1,float16,float16,0,0.09829760193824769
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,64,8,128,1,fp8,fp8,0,0.09917280077934265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,64,64,128,1,float16,float16,0,0.12551679611206054
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,64,64,128,1,float16,fp8,0,0.1257151961326599
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,64,64,128,1,fp8,fp8,0,0.1260543942451477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,64,1,128,1,float16,float16,0,0.046881601214408875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,64,1,128,1,float16,fp8,0,0.04806880056858063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,64,1,128,1,fp8,fp8,0,0.04858080148696899
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,64,2,128,1,float16,float16,0,0.04753600060939789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,64,2,128,1,float16,fp8,0,0.048020800948143004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,64,2,128,1,fp8,fp8,0,0.04855200052261353
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,64,4,128,1,float16,float16,0,0.04923200011253357
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,64,4,128,1,float16,fp8,0,0.049377599358558656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,64,4,128,1,fp8,fp8,0,0.049244800209999086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,64,8,128,1,float16,float16,0,0.05680000185966492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,64,8,128,1,float16,fp8,0,0.05523040294647217
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,64,8,128,1,fp8,fp8,0,0.0542639970779419
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,64,64,128,1,float16,float16,0,0.0728048026561737
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,64,64,128,1,float16,fp8,0,0.06707040071487427
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,64,64,128,1,fp8,fp8,0,0.0669871985912323
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,64,1,128,1,float16,float16,0,0.032419198751449586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,64,1,128,1,float16,fp8,0,0.03391200006008148
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,64,1,128,1,fp8,fp8,0,0.03414719998836517
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,64,2,128,1,float16,float16,0,0.032318401336669925
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,64,2,128,1,float16,fp8,0,0.033913600444793704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,64,2,128,1,fp8,fp8,0,0.033904001116752625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,64,4,128,1,float16,float16,0,0.03270559906959534
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,64,4,128,1,float16,fp8,0,0.03433600068092346
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,64,4,128,1,fp8,fp8,0,0.03431040048599243
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,64,8,128,1,float16,float16,0,0.03373599946498871
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,64,8,128,1,float16,fp8,0,0.03524639904499054
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,64,8,128,1,fp8,fp8,0,0.034974399209022525
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,64,1,128,1,float16,float16,0,1.5286815643310547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,64,1,128,1,float16,fp8,0,1.6285423278808593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,64,1,128,1,fp8,fp8,0,1.6311775207519532
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,64,2,128,1,float16,float16,0,1.6197616577148437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,64,2,128,1,float16,fp8,0,1.6976352691650392
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,64,2,128,1,fp8,fp8,0,1.695756721496582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,64,4,128,1,float16,float16,0,1.7594079971313477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,64,4,128,1,float16,fp8,0,1.8356496810913085
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,64,4,128,1,fp8,fp8,0,1.8287776947021483
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,64,8,128,1,float16,float16,0,2.0582176208496095
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,64,1,128,1,float16,float16,0,0.7788432121276856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,64,8,128,1,float16,fp8,0,2.12357120513916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,64,1,128,1,float16,fp8,0,0.8224543571472168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,64,8,128,1,fp8,fp8,0,2.130072021484375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,64,1,128,1,fp8,fp8,0,0.826411247253418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,64,2,128,1,float16,float16,0,0.8162848472595214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,64,64,128,1,float16,float16,0,3.0827232360839845
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,64,2,128,1,fp8,fp8,0,0.8556367874145507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,64,2,128,1,float16,fp8,0,0.8621983528137207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,64,64,128,1,float16,fp8,0,3.0490720748901365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,64,64,128,1,fp8,fp8,0,3.048784065246582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,64,4,128,1,float16,float16,0,0.8923328399658204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,64,4,128,1,float16,fp8,0,0.9284095764160156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,64,4,128,1,fp8,fp8,0,0.9295104026794434
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,64,8,128,1,float16,float16,0,1.0443663597106934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,64,8,128,1,float16,fp8,0,1.0665648460388184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,64,8,128,1,fp8,fp8,0,1.0709343910217286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,64,1,128,1,float16,float16,0,0.40253920555114747
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,64,64,128,1,float16,float16,0,1.555782413482666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,64,1,128,1,fp8,fp8,0,0.42298078536987305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,64,1,128,1,float16,fp8,0,0.42696800231933596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,64,2,128,1,float16,float16,0,0.4210207939147949
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,64,64,128,1,float16,fp8,0,1.5382415771484375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,64,64,128,1,fp8,fp8,0,1.5359487533569336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,64,2,128,1,float16,fp8,0,0.4420720100402832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,64,4,128,1,float16,fp8,0,0.4767871856689453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,64,2,128,1,fp8,fp8,0,0.4432079792022705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,64,4,128,1,float16,float16,0,0.45688958168029786
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,64,4,128,1,fp8,fp8,0,0.477953577041626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,64,8,128,1,float16,float16,0,0.5327616214752198
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,64,8,128,1,float16,fp8,0,0.5460159778594971
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,64,8,128,1,fp8,fp8,0,0.5450032234191895
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,64,1,128,1,float16,float16,0,0.21522080898284912
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,64,64,128,1,float16,float16,0,0.7917247772216797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,64,64,128,1,float16,fp8,0,0.7799551963806153
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,64,64,128,1,fp8,fp8,0,0.7801328182220459
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,64,1,128,1,float16,fp8,0,0.2254256010055542
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,64,1,128,1,fp8,fp8,0,0.2255728006362915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,64,2,128,1,float16,float16,0,0.22159841060638427
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,64,2,128,1,float16,fp8,0,0.23319680690765382
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,64,2,128,1,fp8,fp8,0,0.23263518810272216
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,64,4,128,1,float16,float16,0,0.24041280746459961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,64,4,128,1,float16,fp8,0,0.24858241081237792
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,64,4,128,1,fp8,fp8,0,0.2505743980407715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,64,8,128,1,float16,float16,0,0.27758240699768066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,64,8,128,1,float16,fp8,0,0.2839296102523804
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,64,1,128,1,float16,float16,0,0.11749440431594849
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,64,8,128,1,fp8,fp8,0,0.28458240032196047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,64,64,128,1,float16,float16,0,0.4094480037689209
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,64,64,128,1,float16,fp8,0,0.40236802101135255
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,64,64,128,1,fp8,fp8,0,0.4013232231140137
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,64,1,128,1,float16,fp8,0,0.12302559614181519
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,64,1,128,1,fp8,fp8,0,0.12252000570297242
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,64,2,128,1,float16,float16,0,0.12367680072784423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,64,2,128,1,float16,fp8,0,0.12842080593109131
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,64,2,128,1,fp8,fp8,0,0.12836639881134032
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,64,4,128,1,float16,float16,0,0.13126399517059326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,64,4,128,1,float16,fp8,0,0.13552160263061525
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,64,4,128,1,fp8,fp8,0,0.13595199584960938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,64,8,128,1,float16,fp8,0,0.15343040227890015
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,64,8,128,1,float16,float16,0,0.1508128046989441
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,64,8,128,1,fp8,fp8,0,0.1535904049873352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,64,64,128,1,float16,float16,0,0.21692159175872802
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,64,64,128,1,float16,fp8,0,0.21251521110534669
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,64,1,128,1,float16,float16,0,0.07154560089111328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,64,64,128,1,fp8,fp8,0,0.21287360191345214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,64,1,128,1,float16,fp8,0,0.07362560033798218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,64,1,128,1,fp8,fp8,0,0.07355999946594238
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,64,2,128,1,float16,float16,0,0.07148799896240235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,64,2,128,1,float16,fp8,0,0.07446079850196838
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,64,2,128,1,fp8,fp8,0,0.07445759773254394
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,64,4,128,1,float16,float16,0,0.07813119888305664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,64,4,128,1,float16,fp8,0,0.08036479949951172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,64,4,128,1,fp8,fp8,0,0.07994080185890198
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,64,8,128,1,float16,float16,0,0.0861024022102356
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,64,8,128,1,float16,fp8,0,0.08767359852790832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,64,8,128,1,fp8,fp8,0,0.08762879967689514
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,64,64,128,1,float16,float16,0,0.12042880058288574
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,64,64,128,1,float16,fp8,0,0.11770399808883666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,64,64,128,1,fp8,fp8,0,0.11764800548553467
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,64,1,128,1,float16,float16,0,0.04193440079689026
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,64,1,128,1,float16,fp8,0,0.04257920086383819
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,64,1,128,1,fp8,fp8,0,0.04265759885311127
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,64,2,128,1,float16,float16,0,0.04235199987888336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,64,2,128,1,float16,fp8,0,0.042982399463653564
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,64,2,128,1,fp8,fp8,0,0.04316479861736298
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,64,4,128,1,float16,float16,0,0.04343520104885101
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,64,4,128,1,float16,fp8,0,0.04396480023860931
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,64,4,128,1,fp8,fp8,0,0.04384799897670746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,64,8,128,1,float16,float16,0,0.05103840231895447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,64,8,128,1,float16,fp8,0,0.04920800030231476
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,64,8,128,1,fp8,fp8,0,0.04948799908161163
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,64,64,128,1,float16,float16,0,0.06937440037727356
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,64,64,128,1,float16,fp8,0,0.061887997388839724
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,64,64,128,1,fp8,fp8,0,0.06111840009689331
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,64,1,128,1,float16,float16,0,0.029198399186134337
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,64,1,128,1,float16,fp8,0,0.030788800120353697
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,64,1,128,1,fp8,fp8,0,0.030609598755836485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,64,2,128,1,float16,float16,0,0.02915999889373779
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,64,2,128,1,float16,fp8,0,0.030579200387001036
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,64,2,128,1,fp8,fp8,0,0.030660799145698546
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,64,4,128,1,float16,float16,0,0.029713600873947144
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,64,4,128,1,float16,fp8,0,0.0308351993560791
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,64,4,128,1,fp8,fp8,0,0.030820798873901368
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,64,8,128,1,float16,float16,0,0.030665600299835206
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,64,8,128,1,float16,fp8,0,0.03171359896659851
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,64,1,128,1,fp8,fp8,0,0.024480000138282776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,64,8,128,1,fp8,fp8,0,0.03185279965400696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,64,64,128,1,float16,float16,0,0.036462399363517764
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,64,64,128,1,float16,fp8,0,0.03726080060005188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,64,64,128,1,fp8,fp8,0,0.037201601266860965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,64,1,128,1,float16,float16,0,0.023028799891471864
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,64,1,128,1,float16,fp8,0,0.02396959960460663
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,64,2,128,1,float16,float16,0,0.023582400381565095
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,64,2,128,1,float16,fp8,0,0.024358400702476503
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,64,2,128,1,fp8,fp8,0,0.024225600063800812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,64,4,128,1,float16,float16,0,0.023497599363327026
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,64,4,128,1,float16,fp8,0,0.02436479926109314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,64,4,128,1,fp8,fp8,0,0.024323199689388276
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,64,8,128,1,float16,float16,0,0.023897600173950196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,64,8,128,1,float16,fp8,0,0.024639999866485594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,64,8,128,1,fp8,fp8,0,0.02470560073852539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,64,1,128,1,float16,float16,0,0.7219759941101074
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,64,1,128,1,float16,fp8,0,0.777020788192749
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,64,1,128,1,fp8,fp8,0,0.7764880180358886
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,64,2,128,1,float16,float16,0,0.7467135906219482
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,64,2,128,1,float16,fp8,0,0.8067104339599609
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,64,2,128,1,fp8,fp8,0,0.8118063926696777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,64,4,128,1,float16,float16,0,0.8183135986328125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,64,4,128,1,float16,fp8,0,0.8877615928649902
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,64,4,128,1,fp8,fp8,0,0.881169605255127
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,64,8,128,1,float16,float16,0,0.9643983840942383
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,64,8,128,1,float16,fp8,0,1.033956813812256
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,64,8,128,1,fp8,fp8,0,1.028934383392334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,64,1,128,1,float16,float16,0,0.36872479915618894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,64,1,128,1,float16,fp8,0,0.39929120540618895
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,64,64,128,1,float16,float16,0,1.5195119857788086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,64,1,128,1,fp8,fp8,0,0.3998271942138672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,64,2,128,1,float16,float16,0,0.385809588432312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,64,64,128,1,float16,fp8,0,1.5282416343688965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,64,64,128,1,fp8,fp8,0,1.52925443649292
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,64,2,128,1,float16,fp8,0,0.4164735794067383
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,64,2,128,1,fp8,fp8,0,0.41665921211242674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,64,4,128,1,float16,float16,0,0.42093920707702637
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,64,4,128,1,float16,fp8,0,0.45274720191955564
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,64,4,128,1,fp8,fp8,0,0.45086240768432617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,64,8,128,1,float16,float16,0,0.49112801551818847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,64,8,128,1,float16,fp8,0,0.5276336193084716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,64,8,128,1,fp8,fp8,0,0.5253119945526123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,64,64,128,1,float16,float16,0,0.7726672172546387
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,64,1,128,1,float16,float16,0,0.1952175974845886
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,64,1,128,1,float16,fp8,0,0.21064319610595703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,64,64,128,1,float16,fp8,0,0.7766335964202881
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,64,1,128,1,fp8,fp8,0,0.2097615957260132
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,64,64,128,1,fp8,fp8,0,0.7763311862945557
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,64,2,128,1,float16,float16,0,0.20523519515991212
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,64,2,128,1,float16,fp8,0,0.21891040802001954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,64,2,128,1,fp8,fp8,0,0.219268798828125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,64,8,128,1,float16,fp8,0,0.2767024040222168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,64,4,128,1,float16,float16,0,0.22258079051971436
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,64,4,128,1,float16,fp8,0,0.2365664005279541
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,64,4,128,1,fp8,fp8,0,0.237390398979187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,64,8,128,1,float16,float16,0,0.256057596206665
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,64,8,128,1,fp8,fp8,0,0.27537438869476316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,64,64,128,1,float16,float16,0,0.4000415802001953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,64,64,128,1,float16,fp8,0,0.3992975950241089
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,64,1,128,1,float16,float16,0,0.10802719593048096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,64,64,128,1,fp8,fp8,0,0.3992000102996826
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,64,1,128,1,float16,fp8,0,0.11720000505447388
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,64,1,128,1,fp8,fp8,0,0.11760319471359253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,64,2,128,1,float16,float16,0,0.11294080018997192
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,64,2,128,1,float16,fp8,0,0.12399840354919434
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,64,2,128,1,fp8,fp8,0,0.1227903962135315
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,64,4,128,1,float16,float16,0,0.12223520278930664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,64,4,128,1,float16,fp8,0,0.13039679527282716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,64,4,128,1,fp8,fp8,0,0.1309872031211853
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,64,8,128,1,float16,float16,0,0.1406559944152832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,64,8,128,1,float16,fp8,0,0.14905279874801636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,64,8,128,1,fp8,fp8,0,0.14888800382614137
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,64,64,128,1,float16,float16,0,0.21292479038238527
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,64,64,128,1,float16,fp8,0,0.20820798873901367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,64,64,128,1,fp8,fp8,0,0.20807199478149413
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,64,1,128,1,float16,float16,0,0.06737440228462219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,64,1,128,1,float16,fp8,0,0.07018079757690429
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,64,1,128,1,fp8,fp8,0,0.07002239823341369
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,64,2,128,1,float16,float16,0,0.06775199770927429
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,64,2,128,1,float16,fp8,0,0.07007200121879578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,64,2,128,1,fp8,fp8,0,0.0702127993106842
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,64,4,128,1,float16,float16,0,0.07319200038909912
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,64,4,128,1,float16,fp8,0,0.07638400197029113
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,64,4,128,1,fp8,fp8,0,0.07614399790763855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,64,8,128,1,float16,float16,0,0.08170719742774964
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,64,8,128,1,float16,fp8,0,0.08418400287628174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,64,8,128,1,fp8,fp8,0,0.08398240208625793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,64,64,128,1,float16,float16,0,0.11761280298233032
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,64,64,128,1,float16,fp8,0,0.11267039775848389
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,64,64,128,1,fp8,fp8,0,0.11259360313415527
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,64,1,128,1,float16,float16,0,0.037868800759315493
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,64,1,128,1,float16,fp8,0,0.03946399986743927
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,64,1,128,1,fp8,fp8,0,0.03973119854927063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,64,2,128,1,float16,float16,0,0.03958080112934113
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,64,2,128,1,float16,fp8,0,0.040140798687934874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,64,2,128,1,fp8,fp8,0,0.039603200554847715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,64,4,128,1,float16,float16,0,0.04108160138130188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,64,4,128,1,float16,fp8,0,0.04030239880084992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,64,4,128,1,fp8,fp8,0,0.040761598944664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,64,8,128,1,float16,float16,0,0.04711360037326813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,64,8,128,1,float16,fp8,0,0.04583039879798889
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,64,8,128,1,fp8,fp8,0,0.04596000015735626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,64,64,128,1,float16,float16,0,0.0669103980064392
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,64,64,128,1,float16,fp8,0,0.0576416015625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,64,64,128,1,fp8,fp8,0,0.056897598505020144
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,64,1,128,1,float16,float16,0,0.027478399872779845
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,64,1,128,1,float16,fp8,0,0.02881760001182556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,64,1,128,1,fp8,fp8,0,0.028932800889015196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,64,2,128,1,float16,float16,0,0.027643200755119324
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,64,2,128,1,float16,fp8,0,0.02871519923210144
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,64,2,128,1,fp8,fp8,0,0.028782400488853454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,64,4,128,1,float16,float16,0,0.028038400411605834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,64,4,128,1,float16,fp8,0,0.029003199934959412
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,64,4,128,1,fp8,fp8,0,0.028809601068496705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,64,8,128,1,float16,float16,0,0.029168000817298888
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,64,8,128,1,float16,fp8,0,0.029783999919891356
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,64,8,128,1,fp8,fp8,0,0.029956799745559693
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,64,64,128,1,float16,float16,0,0.03527520000934601
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,64,64,128,1,float16,fp8,0,0.035488000512123107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,64,64,128,1,fp8,fp8,0,0.03568480014801025
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,64,1,128,1,float16,float16,0,0.021488000452518464
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,64,1,128,1,float16,fp8,0,0.022574399411678315
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,64,1,128,1,fp8,fp8,0,0.022491200268268584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,64,2,128,1,float16,float16,0,0.02178719937801361
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,64,2,128,1,float16,fp8,0,0.022728000581264497
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,64,2,128,1,fp8,fp8,0,0.022759999334812164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,64,4,128,1,float16,float16,0,0.02221599966287613
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,64,4,128,1,float16,fp8,0,0.022524799406528472
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,64,4,128,1,fp8,fp8,0,0.02282080054283142
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,64,8,128,1,float16,float16,0,0.022427199780941008
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,64,8,128,1,float16,fp8,0,0.02298399955034256
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,64,8,128,1,fp8,fp8,0,0.023020799458026885
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,64,64,128,1,float16,float16,0,0.026822400093078614
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,64,64,128,1,float16,fp8,0,0.02731359899044037
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,64,64,128,1,fp8,fp8,0,0.027107200026512145
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,64,1,128,1,float16,float16,0,0.020535999536514284
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,64,1,128,1,float16,fp8,0,0.021425600349903106
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,64,1,128,1,fp8,fp8,0,0.021617600321769716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,64,2,128,1,float16,float16,0,0.020875200629234314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,64,2,128,1,float16,fp8,0,0.021488000452518464
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,64,2,128,1,fp8,fp8,0,0.021558399498462676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,64,4,128,1,float16,float16,0,0.02098879963159561
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,64,4,128,1,float16,fp8,0,0.02176959961652756
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,64,4,128,1,fp8,fp8,0,0.02173759937286377
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,64,8,128,1,float16,float16,0,0.021121600270271303
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,64,8,128,1,float16,fp8,0,0.021823999285697938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,64,8,128,1,fp8,fp8,0,0.022009600698947907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,64,1,128,1,float16,float16,0,0.37097759246826173
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,64,1,128,1,float16,fp8,0,0.40217437744140627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,64,1,128,1,fp8,fp8,0,0.40033278465270994
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,64,2,128,1,float16,float16,0,0.3835007905960083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,64,2,128,1,float16,fp8,0,0.4184864044189453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,64,2,128,1,fp8,fp8,0,0.41872639656066896
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,64,4,128,1,float16,float16,0,0.42014079093933104
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,64,4,128,1,float16,fp8,0,0.4511119842529297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,64,4,128,1,fp8,fp8,0,0.44976320266723635
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,64,8,128,1,float16,float16,0,0.4955599784851074
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,64,8,128,1,float16,fp8,0,0.5258063793182373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,64,1,128,1,float16,fp8,0,0.21003520488739014
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,64,8,128,1,fp8,fp8,0,0.5250319957733154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,64,64,128,1,float16,float16,0,0.8901935577392578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,64,1,128,1,float16,float16,0,0.19781440496444702
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,64,2,128,1,fp8,fp8,0,0.21853439807891845
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,64,64,128,1,float16,fp8,0,0.9132479667663574
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,64,64,128,1,fp8,fp8,0,0.911673641204834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,64,1,128,1,fp8,fp8,0,0.21364641189575195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,64,2,128,1,float16,float16,0,0.2039344072341919
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,64,2,128,1,float16,fp8,0,0.21835680007934571
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,64,4,128,1,float16,float16,0,0.2238384008407593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,64,4,128,1,float16,fp8,0,0.23676960468292235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,64,4,128,1,fp8,fp8,0,0.23685760498046876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,64,8,128,1,float16,float16,0,0.2592639923095703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,64,8,128,1,float16,fp8,0,0.2719487905502319
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,64,8,128,1,fp8,fp8,0,0.27399520874023436
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,64,64,128,1,float16,float16,0,0.45650558471679686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,64,64,128,1,float16,fp8,0,0.4693151950836182
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,64,1,128,1,float16,float16,0,0.10758719444274903
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,64,64,128,1,fp8,fp8,0,0.46939520835876464
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,64,1,128,1,float16,fp8,0,0.11774400472640992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,64,1,128,1,fp8,fp8,0,0.11811679601669312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,64,2,128,1,float16,float16,0,0.11361119747161866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,64,2,128,1,float16,fp8,0,0.12388960123062134
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,64,2,128,1,fp8,fp8,0,0.12371519804000855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,64,4,128,1,float16,float16,0,0.12244319915771484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,64,4,128,1,float16,fp8,0,0.13164960145950316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,64,4,128,1,fp8,fp8,0,0.1314479947090149
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,64,8,128,1,float16,float16,0,0.1397968053817749
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,64,8,128,1,float16,fp8,0,0.15016000270843505
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,64,8,128,1,fp8,fp8,0,0.14928640127182008
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,64,64,128,1,float16,float16,0,0.24147040843963624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,64,64,128,1,float16,fp8,0,0.24375998973846436
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,64,64,128,1,fp8,fp8,0,0.24435679912567138
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,64,1,128,1,float16,float16,0,0.06786400079727173
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,64,1,128,1,float16,fp8,0,0.07066239714622498
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,64,1,128,1,fp8,fp8,0,0.06971840262413025
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,64,2,128,1,float16,float16,0,0.06847360134124755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,64,2,128,1,float16,fp8,0,0.07113440036773681
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,64,2,128,1,fp8,fp8,0,0.07086719870567322
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,64,4,128,1,float16,float16,0,0.07352960109710693
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,64,64,128,1,float16,float16,0,0.13097280263900757
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,64,4,128,1,float16,fp8,0,0.07676960229873657
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,64,4,128,1,fp8,fp8,0,0.0763487994670868
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,64,8,128,1,float16,float16,0,0.0823248028755188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,64,8,128,1,float16,fp8,0,0.08438879847526551
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,64,8,128,1,fp8,fp8,0,0.08463039994239807
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,64,64,128,1,float16,fp8,0,0.1306175947189331
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,64,64,128,1,fp8,fp8,0,0.1305184006690979
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,64,1,128,1,float16,float16,0,0.037411201000213626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,64,1,128,1,float16,fp8,0,0.039083200693130496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,64,1,128,1,fp8,fp8,0,0.039345601201057435
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,64,2,128,1,float16,float16,0,0.03842720091342926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,64,2,128,1,float16,fp8,0,0.03954879939556122
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,64,2,128,1,fp8,fp8,0,0.03945760130882263
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,64,4,128,1,float16,float16,0,0.04047519862651825
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,64,4,128,1,float16,fp8,0,0.04051679968833923
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,64,4,128,1,fp8,fp8,0,0.040443199872970584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,64,8,128,1,float16,float16,0,0.04758560061454773
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,64,64,128,1,fp8,fp8,0,0.0661408007144928
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,64,8,128,1,float16,fp8,0,0.04679040014743805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,64,8,128,1,fp8,fp8,0,0.04527359902858734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,64,64,128,1,float16,float16,0,0.07435680031776429
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,64,64,128,1,float16,fp8,0,0.06638879776000976
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,64,1,128,1,float16,float16,0,0.02741119861602783
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,64,1,128,1,float16,fp8,0,0.028987199068069458
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,64,1,128,1,fp8,fp8,0,0.02876800000667572
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,64,2,128,1,float16,float16,0,0.02754240036010742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,64,2,128,1,float16,fp8,0,0.028630399703979494
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,64,2,128,1,fp8,fp8,0,0.02895520031452179
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,64,8,128,1,float16,fp8,0,0.03017440140247345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,64,4,128,1,float16,float16,0,0.02783840000629425
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,64,4,128,1,float16,fp8,0,0.02928000092506409
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,64,4,128,1,fp8,fp8,0,0.029257598519325256
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,64,8,128,1,float16,float16,0,0.028747200965881348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,64,8,128,1,fp8,fp8,0,0.029767999053001405
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,64,64,128,1,float16,float16,0,0.03824959993362427
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,64,64,128,1,float16,fp8,0,0.039540800452232364
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,64,64,128,1,fp8,fp8,0,0.039534398913383485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,64,1,128,1,float16,float16,0,0.021539199352264404
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,64,1,128,1,float16,fp8,0,0.022592000663280487
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,64,1,128,1,fp8,fp8,0,0.022457599639892578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,64,2,128,1,float16,float16,0,0.021748800575733186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,64,2,128,1,float16,fp8,0,0.02250719964504242
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,64,2,128,1,fp8,fp8,0,0.022731199860572815
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,64,4,128,1,float16,float16,0,0.021831999719142913
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,64,4,128,1,float16,fp8,0,0.022588799893856048
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,64,4,128,1,fp8,fp8,0,0.022793599963188173
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,64,8,128,1,float16,float16,0,0.022302399575710296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,64,8,128,1,float16,fp8,0,0.022977599501609804
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,64,8,128,1,fp8,fp8,0,0.022932800650596618
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,64,64,128,1,float16,float16,0,0.026332798600196838
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,64,64,128,1,float16,fp8,0,0.02709600031375885
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,64,64,128,1,fp8,fp8,0,0.027184000611305235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,64,1,128,1,float16,float16,0,0.020183999836444855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,64,1,128,1,float16,fp8,0,0.021635200083255767
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,64,1,128,1,fp8,fp8,0,0.021433599293231964
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,64,2,128,1,float16,float16,0,0.02067199945449829
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,64,2,128,1,float16,fp8,0,0.021352000534534454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,64,2,128,1,fp8,fp8,0,0.021547199785709382
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,64,4,128,1,float16,float16,0,0.021003200113773345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,64,4,128,1,float16,fp8,0,0.02151840031147003
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,64,4,128,1,fp8,fp8,0,0.021536000072956085
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,64,8,128,1,float16,float16,0,0.02088160067796707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,64,8,128,1,float16,fp8,0,0.02181279957294464
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,64,8,128,1,fp8,fp8,0,0.021691200137138367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,64,64,128,1,float16,float16,0,0.021593600511550903
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,64,64,128,1,float16,fp8,0,0.022593599557876588
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,64,64,128,1,fp8,fp8,0,0.022742399573326112
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,64,1,128,1,float16,float16,0,0.019897599518299103
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,64,1,128,1,float16,fp8,0,0.020951999723911284
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,64,1,128,1,fp8,fp8,0,0.02080959975719452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,64,2,128,1,float16,float16,0,0.020268799364566804
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,64,2,128,1,float16,fp8,0,0.020819200575351714
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,64,2,128,1,fp8,fp8,0,0.02095839977264404
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,64,4,128,1,float16,float16,0,0.020283199846744537
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,64,4,128,1,float16,fp8,0,0.021367999911308288
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,64,4,128,1,fp8,fp8,0,0.02115039974451065
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,64,8,128,1,float16,float16,0,0.020499199628829956
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,64,8,128,1,float16,fp8,0,0.021243199706077576
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,64,8,128,1,fp8,fp8,0,0.021675199270248413
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,64,1,128,1,float16,float16,0,0.1967743992805481
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,64,1,128,1,float16,fp8,0,0.21468639373779297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,64,1,128,1,fp8,fp8,0,0.21474080085754393
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,64,2,128,1,float16,float16,0,0.20451838970184327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,64,2,128,1,float16,fp8,0,0.21992158889770508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,64,2,128,1,fp8,fp8,0,0.21921279430389404
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,64,4,128,1,float16,float16,0,0.2215343952178955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,64,4,128,1,float16,fp8,0,0.2360896110534668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,64,4,128,1,fp8,fp8,0,0.23595840930938722
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,64,8,128,1,float16,float16,0,0.258188796043396
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,64,8,128,1,float16,fp8,0,0.2753007888793945
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,64,8,128,1,fp8,fp8,0,0.2752432107925415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,64,64,128,1,float16,float16,0,0.5760863780975342
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,64,64,128,1,float16,fp8,0,0.6083600044250488
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,64,1,128,1,float16,float16,0,0.10733439922332763
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,64,64,128,1,fp8,fp8,0,0.6075119972229004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,64,1,128,1,float16,fp8,0,0.11785279512405396
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,64,1,128,1,fp8,fp8,0,0.11918879747390747
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,64,2,128,1,float16,float16,0,0.11369119882583618
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,64,8,128,1,float16,float16,0,0.13926080465316773
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,64,2,128,1,float16,fp8,0,0.12397279739379882
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,64,2,128,1,fp8,fp8,0,0.12381919622421264
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,64,4,128,1,float16,float16,0,0.1215440034866333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,64,4,128,1,float16,fp8,0,0.1310927987098694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,64,4,128,1,fp8,fp8,0,0.13181439638137818
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,64,8,128,1,float16,fp8,0,0.1504639983177185
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,64,8,128,1,fp8,fp8,0,0.15039360523223877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,64,64,128,1,float16,float16,0,0.30066399574279784
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,64,64,128,1,float16,fp8,0,0.31631999015808104
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,64,1,128,1,float16,float16,0,0.06808800101280213
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,64,64,128,1,fp8,fp8,0,0.31554560661315917
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,64,1,128,1,float16,fp8,0,0.07091360092163086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,64,1,128,1,fp8,fp8,0,0.07053920030593872
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,64,2,128,1,float16,float16,0,0.06886240243911743
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,64,2,128,1,float16,fp8,0,0.07186880111694335
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,64,2,128,1,fp8,fp8,0,0.0712768018245697
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,64,4,128,1,float16,float16,0,0.07460479736328125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,64,4,128,1,float16,fp8,0,0.07645120024681092
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,64,4,128,1,fp8,fp8,0,0.07720159888267517
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,64,8,128,1,float16,float16,0,0.08294559717178344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,64,8,128,1,float16,fp8,0,0.08551679849624634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,64,8,128,1,fp8,fp8,0,0.0853215992450714
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,64,64,128,1,float16,float16,0,0.16139520406723024
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,64,64,128,1,float16,fp8,0,0.1687999963760376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,64,64,128,1,fp8,fp8,0,0.16828479766845703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,64,1,128,1,float16,float16,0,0.03759360015392303
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,64,1,128,1,float16,fp8,0,0.03962720036506653
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,64,1,128,1,fp8,fp8,0,0.03952800035476685
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,64,2,128,1,float16,float16,0,0.03808479905128479
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,64,2,128,1,float16,fp8,0,0.039977601170539855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,64,2,128,1,fp8,fp8,0,0.03969280123710632
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,64,4,128,1,float16,float16,0,0.039868798851966855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,64,4,128,1,float16,fp8,0,0.04096960127353668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,64,4,128,1,fp8,fp8,0,0.04085760116577149
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,64,8,128,1,float16,float16,0,0.047367998957633974
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,64,8,128,1,float16,fp8,0,0.04543839991092682
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,64,8,128,1,fp8,fp8,0,0.045956799387931825
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,64,64,128,1,float16,float16,0,0.0891103982925415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,64,2,128,1,float16,float16,0,0.027374398708343507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,64,64,128,1,float16,fp8,0,0.08429759740829468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,64,64,128,1,fp8,fp8,0,0.08462719917297364
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,64,1,128,1,float16,float16,0,0.02760159969329834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,64,1,128,1,float16,fp8,0,0.028700798749923706
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,64,1,128,1,fp8,fp8,0,0.02890079915523529
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,64,2,128,1,float16,fp8,0,0.028747200965881348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,64,2,128,1,fp8,fp8,0,0.028951999545097352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,64,4,128,1,float16,float16,0,0.027459201216697694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,64,4,128,1,float16,fp8,0,0.02876160144805908
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,64,4,128,1,fp8,fp8,0,0.029019200801849367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,64,8,128,1,float16,float16,0,0.028652799129486085
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,64,8,128,1,float16,fp8,0,0.030160000920295714
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,64,8,128,1,fp8,fp8,0,0.030302399396896364
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,64,64,128,1,float16,float16,0,0.04635519981384277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,64,64,128,1,float16,fp8,0,0.04880639910697937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,64,64,128,1,fp8,fp8,0,0.04853599965572357
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,64,1,128,1,float16,float16,0,0.02176959961652756
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,64,1,128,1,float16,fp8,0,0.022702400386333466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,64,1,128,1,fp8,fp8,0,0.022912000119686127
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,64,2,128,1,float16,float16,0,0.021823999285697938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,64,2,128,1,float16,fp8,0,0.022961600124835967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,64,2,128,1,fp8,fp8,0,0.02292799949645996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,64,4,128,1,float16,float16,0,0.021910400688648225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,64,4,128,1,float16,fp8,0,0.02255840003490448
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,64,4,128,1,fp8,fp8,0,0.022779199481010436
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,64,8,128,1,float16,float16,0,0.022259199619293214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,64,8,128,1,float16,fp8,0,0.023231999576091768
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,64,8,128,1,fp8,fp8,0,0.022951999306678773
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,64,64,128,1,float16,float16,0,0.03009440004825592
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,64,64,128,1,float16,fp8,0,0.03147520124912262
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,64,64,128,1,fp8,fp8,0,0.03158079981803894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,64,1,128,1,float16,float16,0,0.020628799498081208
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,64,1,128,1,float16,fp8,0,0.021214400231838227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,64,1,128,1,fp8,fp8,0,0.021612800657749176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,64,2,128,1,float16,float16,0,0.020601600408554077
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,64,2,128,1,float16,fp8,0,0.02130880057811737
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,64,2,128,1,fp8,fp8,0,0.021612800657749176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,64,4,128,1,float16,float16,0,0.02089280039072037
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,64,4,128,1,float16,fp8,0,0.021694399416446686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,64,4,128,1,fp8,fp8,0,0.021620799601078034
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,64,64,128,1,fp8,fp8,0,0.022705599665641785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,64,8,128,1,float16,float16,0,0.02077919989824295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,64,8,128,1,float16,fp8,0,0.02189439982175827
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,64,8,128,1,fp8,fp8,0,0.021508799493312837
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,64,64,128,1,float16,float16,0,0.021780799329280853
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,64,64,128,1,float16,fp8,0,0.022487999498844148
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,64,1,128,1,float16,float16,0,0.0200655996799469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,64,1,128,1,float16,fp8,0,0.021035200357437132
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,64,1,128,1,fp8,fp8,0,0.02107519954442978
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,64,2,128,1,float16,float16,0,0.02028000056743622
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,64,2,128,1,float16,fp8,0,0.021014399826526642
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,64,2,128,1,fp8,fp8,0,0.02115360051393509
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,64,4,128,1,float16,float16,0,0.02025440037250519
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,64,4,128,1,float16,fp8,0,0.021087999641895293
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,64,4,128,1,fp8,fp8,0,0.021268799901008606
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,64,8,128,1,float16,float16,0,0.02080159932374954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,64,8,128,1,float16,fp8,0,0.0217631995677948
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,64,8,128,1,fp8,fp8,0,0.021246400475502015
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,64,64,128,1,float16,float16,0,0.02094399929046631
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,64,64,128,1,float16,fp8,0,0.021400000154972076
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,64,64,128,1,fp8,fp8,0,0.021561600267887115
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,64,1,128,1,float16,float16,0,0.019681599736213685
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,64,1,128,1,float16,fp8,0,0.02054239958524704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,64,1,128,1,fp8,fp8,0,0.020572799444198608
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,64,2,128,1,float16,float16,0,0.019972799718379973
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,64,2,128,1,float16,fp8,0,0.02051520049571991
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,64,2,128,1,fp8,fp8,0,0.02078240066766739
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,64,4,128,1,float16,float16,0,0.019985599815845488
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,64,4,128,1,float16,fp8,0,0.020476800203323365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,64,4,128,1,fp8,fp8,0,0.020692799985408784
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,64,8,128,1,float16,float16,0,0.020230400562286376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,64,8,128,1,float16,fp8,0,0.02096800059080124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,64,8,128,1,fp8,fp8,0,0.02086720019578934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,64,1,128,1,float16,float16,0,0.10817760229110718
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,64,1,128,1,float16,fp8,0,0.11918079853057861
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,64,1,128,1,fp8,fp8,0,0.11938240528106689
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,64,2,128,1,float16,float16,0,0.11520960330963134
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,64,2,128,1,float16,fp8,0,0.12475199699401855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,64,2,128,1,fp8,fp8,0,0.125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,64,4,128,1,float16,float16,0,0.12254400253295898
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,64,4,128,1,float16,fp8,0,0.13206559419631958
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,64,4,128,1,fp8,fp8,0,0.13233760595321656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,64,8,128,1,float16,float16,0,0.14145439863204956
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,64,8,128,1,float16,fp8,0,0.15075680017471313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,64,8,128,1,fp8,fp8,0,0.15050400495529176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,64,64,128,1,float16,float16,0,0.4249904155731201
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,64,1,128,1,float16,float16,0,0.06924319863319398
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,64,64,128,1,float16,fp8,0,0.4651648044586182
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,64,2,128,1,float16,fp8,0,0.07221760153770447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,64,64,128,1,fp8,fp8,0,0.463270378112793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,64,1,128,1,float16,fp8,0,0.07179679870605468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,64,1,128,1,fp8,fp8,0,0.07208639979362488
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,64,2,128,1,float16,float16,0,0.06908640265464783
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,64,8,128,1,float16,float16,0,0.08341119885444641
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,64,2,128,1,fp8,fp8,0,0.07275360226631164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,64,4,128,1,float16,float16,0,0.07547839879989623
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,64,4,128,1,float16,fp8,0,0.07827200293540955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,64,4,128,1,fp8,fp8,0,0.07810559868812561
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,64,8,128,1,float16,fp8,0,0.08623999953269959
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,64,8,128,1,fp8,fp8,0,0.08608800172805786
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,64,64,128,1,float16,float16,0,0.22394719123840331
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,64,64,128,1,float16,fp8,0,0.245467209815979
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,64,64,128,1,fp8,fp8,0,0.24471039772033693
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,64,1,128,1,float16,float16,0,0.03795520067214966
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,64,1,128,1,float16,fp8,0,0.03982079923152924
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,64,1,128,1,fp8,fp8,0,0.039654400944709775
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,64,2,128,1,float16,float16,0,0.03850879967212677
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,64,2,128,1,float16,fp8,0,0.04043039977550507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,64,2,128,1,fp8,fp8,0,0.04016000032424927
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,64,4,128,1,float16,float16,0,0.039603200554847715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,64,4,128,1,float16,fp8,0,0.04104160070419312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,64,4,128,1,fp8,fp8,0,0.04151839911937714
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,64,8,128,1,float16,float16,0,0.047156798839569095
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,64,8,128,1,float16,fp8,0,0.04608800113201141
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,64,8,128,1,fp8,fp8,0,0.04581120014190674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,64,64,128,1,float16,float16,0,0.11954720020294189
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,64,64,128,1,float16,fp8,0,0.12215679883956909
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,64,64,128,1,fp8,fp8,0,0.12221440076828002
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,64,1,128,1,float16,float16,0,0.027758398652076723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,64,1,128,1,float16,fp8,0,0.02900159955024719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,64,1,128,1,fp8,fp8,0,0.029032000899314882
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,64,2,128,1,float16,float16,0,0.02749919891357422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,64,2,128,1,float16,fp8,0,0.029371199011802674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,64,2,128,1,fp8,fp8,0,0.029283198714256286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,64,4,128,1,float16,float16,0,0.028191998600959778
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,64,4,128,1,float16,fp8,0,0.029392001032829285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,64,4,128,1,fp8,fp8,0,0.029361599683761598
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,64,8,128,1,float16,float16,0,0.028908801078796387
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,64,8,128,1,float16,fp8,0,0.030251199007034303
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,64,8,128,1,fp8,fp8,0,0.030184000730514526
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,64,64,128,1,float16,float16,0,0.06151679754257202
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,64,64,128,1,float16,fp8,0,0.06721439957618713
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,64,64,128,1,fp8,fp8,0,0.06718559861183167
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,64,1,128,1,float16,float16,0,0.021844799816608428
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,64,1,128,1,float16,fp8,0,0.022646400332450866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,64,1,128,1,fp8,fp8,0,0.02258239984512329
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,64,2,128,1,float16,float16,0,0.022176000475883483
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,64,2,128,1,float16,fp8,0,0.022603200376033784
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,64,2,128,1,fp8,fp8,0,0.022782400250434875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,64,4,128,1,float16,float16,0,0.02221119999885559
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,64,4,128,1,float16,fp8,0,0.022892799973487855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,64,4,128,1,fp8,fp8,0,0.022961600124835967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,64,8,128,1,float16,float16,0,0.022198399901390074
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,64,8,128,1,float16,fp8,0,0.023161600530147552
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,64,8,128,1,fp8,fp8,0,0.022988800704479218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,64,64,128,1,float16,float16,0,0.03745439946651459
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,64,64,128,1,float16,fp8,0,0.04095999896526337
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,64,64,128,1,fp8,fp8,0,0.040729600191116336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,64,1,128,1,float16,float16,0,0.020641599595546723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,64,1,128,1,float16,fp8,0,0.021550400555133818
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,64,1,128,1,fp8,fp8,0,0.021704000234603883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,64,2,128,1,float16,float16,0,0.020632000267505647
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,64,2,128,1,float16,fp8,0,0.02152640074491501
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,64,2,128,1,fp8,fp8,0,0.02136320024728775
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,64,4,128,1,float16,float16,0,0.02090719938278198
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,64,4,128,1,float16,fp8,0,0.021550400555133818
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,64,4,128,1,fp8,fp8,0,0.02162880003452301
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,64,8,128,1,float16,float16,0,0.020870399475097657
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,64,8,128,1,float16,fp8,0,0.021835200488567352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,64,8,128,1,fp8,fp8,0,0.0216511994600296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,64,64,128,1,float16,float16,0,0.02571359872817993
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,64,64,128,1,float16,fp8,0,0.026859200000762938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,64,64,128,1,fp8,fp8,0,0.027030399441719054
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,64,1,128,1,float16,float16,0,0.019784000515937806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,64,1,128,1,float16,fp8,0,0.020694400370121
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,64,1,128,1,fp8,fp8,0,0.02090719938278198
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,64,2,128,1,float16,float16,0,0.020380799472332
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,64,2,128,1,float16,fp8,0,0.020934399962425233
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,64,2,128,1,fp8,fp8,0,0.02107200026512146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,64,4,128,1,float16,float16,0,0.02019679993391037
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,64,4,128,1,float16,fp8,0,0.021254399418830873
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,64,4,128,1,fp8,fp8,0,0.020894399285316466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,64,8,128,1,float16,float16,0,0.02059040069580078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,64,8,128,1,float16,fp8,0,0.02138720005750656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,64,8,128,1,fp8,fp8,0,0.021264000236988066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,64,64,128,1,float16,float16,0,0.02088640034198761
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,64,64,128,1,float16,fp8,0,0.021652799844741822
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,64,64,128,1,fp8,fp8,0,0.02160319983959198
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,64,1,128,1,float16,float16,0,0.01972000002861023
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,64,1,128,1,float16,fp8,0,0.020310400426387785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,64,1,128,1,fp8,fp8,0,0.020470400154590607
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,64,2,128,1,float16,float16,0,0.019971199333667755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,64,2,128,1,float16,fp8,0,0.020558400452136992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,64,2,128,1,fp8,fp8,0,0.02038400024175644
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,64,4,128,1,float16,float16,0,0.019867199659347533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,64,4,128,1,float16,fp8,0,0.020612800121307374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,64,4,128,1,fp8,fp8,0,0.020763200521469117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,64,8,128,1,float16,float16,0,0.020241600275039674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,64,8,128,1,float16,fp8,0,0.02099200040102005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,64,8,128,1,fp8,fp8,0,0.020902399718761445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,64,64,128,1,float16,float16,0,0.020179200172424316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,64,64,128,1,float16,fp8,0,0.021196800470352172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,64,64,128,1,fp8,fp8,0,0.021264000236988066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,64,1,128,1,float16,float16,0,0.01951040029525757
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,64,1,128,1,float16,fp8,0,0.020587199926376344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,64,1,128,1,fp8,fp8,0,0.020377600193023683
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,64,2,128,1,float16,float16,0,0.019622400403022766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,64,2,128,1,float16,fp8,0,0.02033279985189438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,64,2,128,1,fp8,fp8,0,0.02017119973897934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,64,4,128,1,float16,float16,0,0.019564799964427948
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,64,4,128,1,float16,fp8,0,0.020286400616168977
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,64,4,128,1,fp8,fp8,0,0.02028000056743622
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,64,8,128,1,float16,float16,0,0.019752000272274018
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,64,8,128,1,float16,fp8,0,0.020526400208473204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,64,8,128,1,fp8,fp8,0,0.02054080069065094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,64,1,128,1,float16,float16,0,0.03014880120754242
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,64,1,128,1,float16,fp8,0,0.031543999910354614
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,64,1,128,1,fp8,fp8,0,0.03190560042858124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,64,2,128,1,float16,float16,0,0.03715200126171112
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,64,2,128,1,float16,fp8,0,0.040699198842048645
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,64,2,128,1,fp8,fp8,0,0.040889599919319154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,64,4,128,1,float16,float16,0,0.05228480100631714
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,64,4,128,1,float16,fp8,0,0.058329600095748904
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,64,4,128,1,fp8,fp8,0,0.05846560001373291
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,64,8,128,1,float16,float16,0,0.08003519773483277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,64,8,128,1,float16,fp8,0,0.09404000043869018
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,64,8,128,1,fp8,fp8,0,0.09422879815101623
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,64,64,128,1,float16,float16,0,0.2515631914138794
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,64,64,128,1,float16,fp8,0,0.3130543947219849
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,64,1,128,1,float16,float16,0,0.02284799963235855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,64,64,128,1,fp8,fp8,0,0.3111759901046753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,64,1,128,1,float16,fp8,0,0.023820799589157105
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,64,1,128,1,fp8,fp8,0,0.023563200235366823
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,64,2,128,1,float16,float16,0,0.0263808012008667
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,64,2,128,1,float16,fp8,0,0.028110399842262268
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,64,2,128,1,fp8,fp8,0,0.028217598795890808
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,64,4,128,1,float16,float16,0,0.034358400106430056
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,64,4,128,1,float16,fp8,0,0.03749119937419891
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,64,4,128,1,fp8,fp8,0,0.03752639889717102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,64,8,128,1,float16,float16,0,0.04873920083045959
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,64,8,128,1,float16,fp8,0,0.05492640137672424
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,64,8,128,1,fp8,fp8,0,0.05511040091514587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,64,64,128,1,float16,float16,0,0.13571360111236572
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,64,64,128,1,float16,fp8,0,0.16428159475326537
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,64,64,128,1,fp8,fp8,0,0.16524159908294678
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,64,1,128,1,float16,float16,0,0.021060800552368163
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,64,1,128,1,float16,fp8,0,0.022123199701309205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,64,1,128,1,fp8,fp8,0,0.021902400255203246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,64,2,128,1,float16,float16,0,0.021372799575328828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,64,2,128,1,float16,fp8,0,0.022099199891090392
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,64,2,128,1,fp8,fp8,0,0.022265599668025972
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,64,4,128,1,float16,float16,0,0.025310400128364562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,64,4,128,1,float16,fp8,0,0.026979199051856993
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,64,4,128,1,fp8,fp8,0,0.02677280008792877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,64,8,128,1,float16,float16,0,0.032913601398468016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,64,8,128,1,float16,fp8,0,0.036160001158714296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,64,8,128,1,fp8,fp8,0,0.03576320111751556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,64,64,128,1,float16,float16,0,0.07779039740562439
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,64,64,128,1,float16,fp8,0,0.09063519835472107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,64,64,128,1,fp8,fp8,0,0.09043999910354614
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,64,1,128,1,float16,float16,0,0.02006240040063858
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,64,1,128,1,float16,fp8,0,0.021040000021457672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,64,1,128,1,fp8,fp8,0,0.02094399929046631
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,64,2,128,1,float16,float16,0,0.020534400641918183
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,64,2,128,1,float16,fp8,0,0.021065600216388702
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,64,2,128,1,fp8,fp8,0,0.021180799603462218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,64,4,128,1,float16,float16,0,0.02040479928255081
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,64,4,128,1,float16,fp8,0,0.021721599996089934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,64,4,128,1,fp8,fp8,0,0.021848000586032867
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,64,8,128,1,float16,float16,0,0.024583999812602998
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,64,8,128,1,float16,fp8,0,0.0262688010931015
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,64,8,128,1,fp8,fp8,0,0.026919999718666078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,64,64,128,1,float16,float16,0,0.04664640128612518
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,64,64,128,1,float16,fp8,0,0.053390401601791385
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,64,64,128,1,fp8,fp8,0,0.0533407986164093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,64,1,128,1,float16,float16,0,0.01934880018234253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,64,1,128,1,float16,fp8,0,0.020326399803161622
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,64,1,128,1,fp8,fp8,0,0.02025440037250519
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,64,2,128,1,float16,float16,0,0.01956000030040741
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,64,2,128,1,float16,fp8,0,0.020360000431537628
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,64,2,128,1,fp8,fp8,0,0.02003840059041977
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,64,4,128,1,float16,float16,0,0.019729599356651306
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,64,4,128,1,float16,fp8,0,0.02046239972114563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,64,4,128,1,fp8,fp8,0,0.020695999264717102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,64,8,128,1,float16,float16,0,0.020164799690246583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,64,8,128,1,float16,fp8,0,0.021081599593162536
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,64,8,128,1,fp8,fp8,0,0.020897600054740905
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,64,64,128,1,float16,float16,0,0.031542399525642396
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,64,64,128,1,float16,fp8,0,0.034995201230049136
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,64,64,128,1,fp8,fp8,0,0.034974399209022525
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,64,1,128,1,float16,float16,0,0.019068799912929535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,64,1,128,1,float16,fp8,0,0.020075200498104094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,64,1,128,1,fp8,fp8,0,0.01979999989271164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,64,2,128,1,float16,float16,0,0.01940480023622513
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,64,2,128,1,float16,fp8,0,0.019763199985027312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,64,2,128,1,fp8,fp8,0,0.02007199972867966
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,64,4,128,1,float16,float16,0,0.01940480023622513
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,64,4,128,1,float16,fp8,0,0.020347200334072113
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,64,4,128,1,fp8,fp8,0,0.020238399505615234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,64,8,128,1,float16,float16,0,0.019628800451755524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,64,8,128,1,float16,fp8,0,0.020638400316238405
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,64,8,128,1,fp8,fp8,0,0.020278400182723998
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,64,64,128,1,float16,float16,0,0.023889599740505217
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,64,64,128,1,float16,fp8,0,0.025415998697280884
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,64,64,128,1,fp8,fp8,0,0.025419199466705324
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,64,1,128,1,float16,float16,0,0.019079999625682832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,64,1,128,1,float16,fp8,0,0.019840000569820403
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,64,1,128,1,fp8,fp8,0,0.019707199931144715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,64,2,128,1,float16,float16,0,0.01884160041809082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,64,2,128,1,float16,fp8,0,0.019809600710868836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,64,2,128,1,fp8,fp8,0,0.019857600331306458
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,64,4,128,1,float16,float16,0,0.019131200015544893
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,64,4,128,1,float16,fp8,0,0.020084799826145174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,64,4,128,1,fp8,fp8,0,0.01984640061855316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,64,8,128,1,float16,float16,0,0.019446399807929993
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,64,8,128,1,float16,fp8,0,0.020051200687885285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,64,8,128,1,fp8,fp8,0,0.020175999402999877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,64,64,128,1,float16,fp8,0,0.02098720073699951
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,64,64,128,1,fp8,fp8,0,0.020927999913692475
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,64,64,128,1,float16,float16,0,0.019862399995326997
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,64,1,128,1,float16,float16,0,0.016950400173664094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,64,1,128,1,float16,fp8,0,0.018087999522686006
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,64,1,128,1,fp8,fp8,0,0.01796319931745529
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,64,2,128,1,float16,float16,0,0.018716800212860107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,64,2,128,1,float16,fp8,0,0.019867199659347533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,64,2,128,1,fp8,fp8,0,0.019912000000476836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,64,4,128,1,float16,float16,0,0.018801599740982056
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,64,4,128,1,float16,fp8,0,0.019857600331306458
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,64,4,128,1,fp8,fp8,0,0.019857600331306458
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,64,8,128,1,float16,float16,0,0.019166399538517
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,64,8,128,1,float16,fp8,0,0.02017119973897934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,64,8,128,1,fp8,fp8,0,0.019684800505638124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,64,64,128,1,float16,float16,0,0.01929759979248047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,64,64,128,1,float16,fp8,0,0.02056799978017807
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,64,64,128,1,fp8,fp8,0,0.020390400290489198
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,64,1,128,1,float16,float16,0,0.016203199326992036
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,64,1,128,1,float16,fp8,0,0.01688639968633652
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,64,1,128,1,fp8,fp8,0,0.0173007994890213
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,64,2,128,1,float16,float16,0,0.01736160069704056
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,64,2,128,1,float16,fp8,0,0.017726400494575502
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,64,2,128,1,fp8,fp8,0,0.017924800515174866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,64,4,128,1,float16,float16,0,0.01894560009241104
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,64,4,128,1,float16,fp8,0,0.019483199715614317
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,64,4,128,1,fp8,fp8,0,0.019449600577354433
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,64,8,128,1,float16,float16,0,0.018859200179576874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,64,8,128,1,float16,fp8,0,0.01969120055437088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,64,8,128,1,fp8,fp8,0,0.019648000597953796
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,48,1,128,1,float16,fp8,0,38.958074951171874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,48,1,128,1,fp8,fp8,0,38.88430480957031
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,48,2,128,1,fp8,fp8,0,38.754766845703124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,48,2,128,1,float16,fp8,0,39.43978271484375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,48,4,128,1,float16,fp8,0,39.20158386230469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,48,1,128,1,float16,float16,0,46.84447326660156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,48,2,128,1,float16,float16,0,46.973046875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,48,4,128,1,float16,float16,0,47.340179443359375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,48,48,128,1,float16,float16,0,26.299273681640624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,48,48,128,1,float16,fp8,0,22.63263702392578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,48,48,128,1,fp8,fp8,0,22.393960571289064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,48,1,128,1,float16,float16,0,22.811236572265624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,48,4,128,1,fp8,fp8,0,39.467221069335935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,48,8,128,1,float16,fp8,0,39.73139343261719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,48,8,128,1,fp8,fp8,0,40.466339111328125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,48,8,128,1,float16,float16,0,47.7765380859375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,48,1,128,1,float16,fp8,0,19.45146942138672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,48,1,128,1,fp8,fp8,0,19.96973419189453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,48,2,128,1,float16,fp8,0,19.649436950683594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,48,2,128,1,float16,float16,0,23.966168212890626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,48,2,128,1,fp8,fp8,0,19.20434112548828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,48,4,128,1,float16,fp8,0,19.59107208251953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,48,4,128,1,float16,float16,0,23.014302062988282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,48,4,128,1,fp8,fp8,0,19.415304565429686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,48,48,128,1,float16,float16,0,13.1462646484375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,48,48,128,1,float16,fp8,0,11.13946533203125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,48,8,128,1,float16,fp8,0,20.25146484375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,48,8,128,1,float16,float16,0,23.830592346191406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,48,8,128,1,fp8,fp8,0,20.138650512695314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,48,48,128,1,fp8,fp8,0,11.394123077392578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,48,1,128,1,float16,float16,0,11.62003173828125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,48,1,128,1,float16,fp8,0,9.603585815429687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,48,1,128,1,fp8,fp8,0,9.63289794921875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,48,2,128,1,float16,fp8,0,9.940888214111329
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,48,2,128,1,fp8,fp8,0,9.609915161132813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,48,2,128,1,float16,float16,0,12.068479919433594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,48,4,128,1,float16,fp8,0,10.077559661865234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,48,4,128,1,float16,float16,0,11.959811401367187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,48,4,128,1,fp8,fp8,0,9.637757110595704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,48,48,128,1,float16,float16,0,6.438470458984375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,48,48,128,1,float16,fp8,0,5.598259353637696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,48,1,128,1,float16,float16,0,5.477182388305664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,48,48,128,1,fp8,fp8,0,5.710788726806641
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,48,1,128,1,float16,fp8,0,4.722028732299805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,48,8,128,1,float16,float16,0,12.35537109375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,48,8,128,1,float16,fp8,0,10.29864959716797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,48,8,128,1,fp8,fp8,0,9.790096282958984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,48,1,128,1,fp8,fp8,0,4.790164947509766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,48,2,128,1,float16,float16,0,5.725652694702148
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,48,2,128,1,float16,fp8,0,4.818627166748047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,48,2,128,1,fp8,fp8,0,4.805852890014648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,48,4,128,1,float16,fp8,0,4.866239929199219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,48,4,128,1,float16,float16,0,5.699435043334961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,48,4,128,1,fp8,fp8,0,4.929584121704101
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,48,8,128,1,float16,float16,0,5.764313507080078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,48,8,128,1,float16,fp8,0,4.90483512878418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,48,8,128,1,fp8,fp8,0,4.981568145751953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,48,1,128,1,float16,fp8,0,22.06640167236328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,48,1,128,1,fp8,fp8,0,21.953335571289063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,48,2,128,1,float16,fp8,0,22.672140502929686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,48,2,128,1,fp8,fp8,0,22.321763610839845
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,48,1,128,1,float16,float16,0,26.331097412109376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,48,2,128,1,float16,float16,0,26.608575439453126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,48,4,128,1,float16,fp8,0,22.75752410888672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,48,4,128,1,float16,float16,0,26.7857177734375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,48,48,128,1,float16,fp8,0,13.246051025390624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,48,48,128,1,fp8,fp8,0,13.519175720214843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,48,48,128,1,float16,float16,0,15.647895812988281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,48,1,128,1,float16,float16,0,13.165170288085937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,48,4,128,1,fp8,fp8,0,22.819744873046876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,48,8,128,1,fp8,fp8,0,22.662095642089845
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,48,8,128,1,float16,fp8,0,23.335360717773437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,48,8,128,1,float16,float16,0,28.186944580078126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,48,1,128,1,float16,fp8,0,10.805445098876953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,48,1,128,1,fp8,fp8,0,10.992140960693359
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,48,2,128,1,float16,float16,0,12.952456665039062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,48,2,128,1,float16,fp8,0,10.900606536865235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,48,2,128,1,fp8,fp8,0,11.072555541992188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,48,4,128,1,float16,fp8,0,11.313692474365235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,48,4,128,1,float16,float16,0,13.624757385253906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,48,4,128,1,fp8,fp8,0,11.182367706298828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,48,48,128,1,float16,float16,0,7.715745544433593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,48,48,128,1,float16,fp8,0,6.63843002319336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,48,8,128,1,float16,fp8,0,11.690606689453125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,48,8,128,1,float16,float16,0,13.846296691894532
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,48,48,128,1,fp8,fp8,0,6.721174621582032
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,48,8,128,1,fp8,fp8,0,11.479332733154298
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,48,1,128,1,float16,float16,0,6.423886108398437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,48,1,128,1,float16,fp8,0,5.454619216918945
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,48,1,128,1,fp8,fp8,0,5.482727813720703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,48,2,128,1,float16,float16,0,6.566627502441406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,48,2,128,1,float16,fp8,0,5.589513778686523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,48,2,128,1,fp8,fp8,0,5.60063362121582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,48,4,128,1,float16,fp8,0,5.528324890136719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,48,4,128,1,float16,float16,0,6.919513702392578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,48,4,128,1,fp8,fp8,0,5.560953521728516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,48,48,128,1,float16,float16,0,3.933415985107422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,48,8,128,1,float16,float16,0,6.8555137634277346
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,48,8,128,1,float16,fp8,0,5.632696151733398
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,48,48,128,1,float16,fp8,0,3.343252944946289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,48,48,128,1,fp8,fp8,0,3.3505455017089845
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,48,8,128,1,fp8,fp8,0,5.765460968017578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,48,1,128,1,float16,fp8,0,2.6920480728149414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,48,1,128,1,float16,float16,0,3.2309215545654295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,48,1,128,1,fp8,fp8,0,2.7571664810180665
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,48,2,128,1,float16,fp8,0,2.7280256271362306
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,48,2,128,1,float16,float16,0,3.1695327758789062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,48,2,128,1,fp8,fp8,0,2.7233184814453124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,48,4,128,1,float16,fp8,0,2.7835487365722655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,48,4,128,1,fp8,fp8,0,2.7520240783691405
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,48,4,128,1,float16,float16,0,3.135851287841797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,48,8,128,1,float16,float16,0,3.3455055236816404
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,48,8,128,1,float16,fp8,0,2.8194816589355467
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,48,8,128,1,fp8,fp8,0,2.8028383255004883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,48,1,128,1,float16,fp8,0,15.1929931640625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,48,1,128,1,fp8,fp8,0,15.524264526367187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,48,2,128,1,float16,fp8,0,15.381246948242188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,48,2,128,1,fp8,fp8,0,15.954728698730468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,48,1,128,1,float16,float16,0,18.855070495605467
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,48,4,128,1,float16,fp8,0,15.585983276367188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,48,2,128,1,float16,float16,0,18.31884307861328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,48,4,128,1,float16,float16,0,19.04357147216797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,48,48,128,1,float16,fp8,0,9.655217742919922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,48,48,128,1,fp8,fp8,0,9.665151977539063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,48,48,128,1,float16,float16,0,11.249305725097656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,48,1,128,1,float16,float16,0,9.332360076904298
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,48,4,128,1,fp8,fp8,0,15.693653869628907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,48,8,128,1,float16,fp8,0,16.067811584472658
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,48,8,128,1,fp8,fp8,0,16.34791717529297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,48,8,128,1,float16,float16,0,19.66878662109375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,48,1,128,1,float16,fp8,0,7.811100769042969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,48,1,128,1,fp8,fp8,0,7.763374328613281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,48,2,128,1,float16,fp8,0,7.836009979248047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,48,2,128,1,fp8,fp8,0,7.9084114074707035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,48,2,128,1,float16,float16,0,9.35479965209961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,48,4,128,1,float16,fp8,0,8.052401733398437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,48,4,128,1,float16,float16,0,9.624769592285157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,48,4,128,1,fp8,fp8,0,7.853001403808594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,48,48,128,1,float16,float16,0,5.720691299438476
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,48,8,128,1,float16,float16,0,9.586260986328124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,48,8,128,1,float16,fp8,0,8.047815704345703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,48,8,128,1,fp8,fp8,0,7.9319602966308596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,48,48,128,1,float16,fp8,0,4.863561630249023
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,48,48,128,1,fp8,fp8,0,4.951870346069336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,48,1,128,1,float16,float16,0,4.426236724853515
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,48,1,128,1,float16,fp8,0,3.8186511993408203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,48,1,128,1,fp8,fp8,0,3.8778575897216796
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,48,2,128,1,float16,float16,0,4.443371200561524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,48,2,128,1,float16,fp8,0,3.8690399169921874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,48,2,128,1,fp8,fp8,0,3.8266849517822266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,48,4,128,1,float16,fp8,0,3.870169448852539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,48,4,128,1,float16,float16,0,4.671393585205078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,48,4,128,1,fp8,fp8,0,3.9630096435546873
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,48,8,128,1,float16,fp8,0,3.998155212402344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,48,48,128,1,float16,fp8,0,2.453265571594238
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,48,8,128,1,float16,float16,0,4.5697582244873045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,48,48,128,1,float16,float16,0,2.7491472244262694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,48,48,128,1,fp8,fp8,0,2.4567071914672853
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,48,1,128,1,float16,float16,0,2.379667282104492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,48,1,128,1,float16,fp8,0,1.9194223403930664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,48,8,128,1,fp8,fp8,0,4.150339126586914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,48,1,128,1,fp8,fp8,0,1.9184463500976563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,48,2,128,1,float16,float16,0,2.097769546508789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,48,2,128,1,float16,fp8,0,2.002697563171387
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,48,2,128,1,fp8,fp8,0,2.0317615509033202
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,48,4,128,1,float16,float16,0,2.16168327331543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,48,4,128,1,fp8,fp8,0,1.9455984115600586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,48,4,128,1,float16,fp8,0,2.059129524230957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,48,8,128,1,float16,float16,0,2.1398527145385744
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,48,8,128,1,float16,fp8,0,2.00020809173584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,48,8,128,1,fp8,fp8,0,2.1146368026733398
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,48,1,128,1,float16,fp8,0,20.29608612060547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,48,1,128,1,fp8,fp8,0,19.836636352539063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,48,2,128,1,fp8,fp8,0,19.999235534667967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,48,2,128,1,float16,fp8,0,20.535536193847655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,48,4,128,1,float16,fp8,0,20.317829895019532
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,48,1,128,1,float16,float16,0,23.64665985107422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,48,2,128,1,float16,float16,0,24.126087951660157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,48,4,128,1,float16,float16,0,24.721287536621094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,48,48,128,1,float16,fp8,0,13.184271240234375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,48,48,128,1,float16,float16,0,15.423374938964844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,48,48,128,1,fp8,fp8,0,13.268553161621094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,48,1,128,1,float16,float16,0,12.193881225585937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,48,4,128,1,fp8,fp8,0,21.10374755859375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,48,8,128,1,float16,fp8,0,21.435282897949218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,48,8,128,1,fp8,fp8,0,21.864833068847656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,48,8,128,1,float16,float16,0,25.49236297607422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,48,1,128,1,float16,fp8,0,9.893180847167969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,48,1,128,1,fp8,fp8,0,9.92272491455078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,48,2,128,1,float16,fp8,0,10.114867401123046
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,48,2,128,1,float16,float16,0,12.033544158935547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,48,2,128,1,fp8,fp8,0,10.125609588623046
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,48,4,128,1,float16,fp8,0,10.385580444335938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,48,4,128,1,float16,float16,0,12.075510406494141
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,48,4,128,1,fp8,fp8,0,10.131572723388672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,48,48,128,1,float16,float16,0,7.582569885253906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,48,48,128,1,float16,fp8,0,6.584496307373047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,48,8,128,1,float16,fp8,0,10.412001800537109
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,48,8,128,1,float16,float16,0,12.447169494628906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,48,8,128,1,fp8,fp8,0,10.527950286865234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,48,1,128,1,float16,float16,0,5.882972717285156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,48,48,128,1,fp8,fp8,0,6.651859283447266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,48,1,128,1,float16,fp8,0,4.952518463134766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,48,1,128,1,fp8,fp8,0,5.074699020385742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,48,2,128,1,float16,fp8,0,5.001724624633789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,48,2,128,1,float16,float16,0,5.9433025360107425
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,48,2,128,1,fp8,fp8,0,5.131532669067383
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,48,4,128,1,float16,float16,0,6.119454574584961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,48,4,128,1,fp8,fp8,0,5.1235198974609375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,48,4,128,1,float16,fp8,0,5.146136093139648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,48,8,128,1,float16,float16,0,6.017343902587891
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,48,48,128,1,float16,fp8,0,3.3209102630615233
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,48,8,128,1,float16,fp8,0,5.272662353515625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,48,48,128,1,float16,float16,0,3.786054229736328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,48,48,128,1,fp8,fp8,0,3.510755157470703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,48,8,128,1,fp8,fp8,0,5.214799880981445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,48,1,128,1,float16,fp8,0,2.480931282043457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,48,1,128,1,float16,float16,0,2.860366439819336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,48,1,128,1,fp8,fp8,0,2.595907211303711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,48,2,128,1,float16,float16,0,2.814249610900879
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,48,2,128,1,float16,fp8,0,2.5338239669799805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,48,2,128,1,fp8,fp8,0,2.5381023406982424
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,48,4,128,1,float16,fp8,0,2.5375776290893555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,48,4,128,1,float16,float16,0,2.874017524719238
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,48,4,128,1,fp8,fp8,0,2.5420015335083006
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,48,8,128,1,float16,float16,0,3.081198310852051
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,48,48,128,1,float16,float16,0,1.7943567276000976
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,48,48,128,1,float16,fp8,0,1.6736112594604493
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,48,8,128,1,float16,fp8,0,2.6220991134643556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,48,48,128,1,fp8,fp8,0,1.6727903366088868
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,48,1,128,1,float16,float16,0,1.4038463592529298
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,48,1,128,1,float16,fp8,0,1.2594256401062012
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,48,8,128,1,fp8,fp8,0,2.78875675201416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,48,2,128,1,float16,float16,0,1.3604656219482423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,48,1,128,1,fp8,fp8,0,1.3379183769226075
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,48,2,128,1,float16,fp8,0,1.2600192070007323
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,48,2,128,1,fp8,fp8,0,1.2693296432495118
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,48,4,128,1,float16,float16,0,1.3410479545593261
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,48,4,128,1,float16,fp8,0,1.310318374633789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,48,4,128,1,fp8,fp8,0,1.2849807739257812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,48,8,128,1,float16,float16,0,1.4598575592041017
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,48,8,128,1,float16,fp8,0,1.3365743637084961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,48,8,128,1,fp8,fp8,0,1.3091695785522461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,48,1,128,1,float16,fp8,0,11.486246490478516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,48,1,128,1,fp8,fp8,0,11.642620849609376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,48,2,128,1,float16,fp8,0,11.722430419921874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,48,2,128,1,fp8,fp8,0,11.824371337890625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,48,1,128,1,float16,float16,0,13.532948303222657
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,48,2,128,1,float16,float16,0,13.716706848144531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,48,4,128,1,float16,float16,0,14.073622131347657
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,48,4,128,1,float16,fp8,0,11.74993896484375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,48,48,128,1,float16,fp8,0,8.224092864990235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,48,48,128,1,float16,float16,0,9.089456176757812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,48,4,128,1,fp8,fp8,0,11.808654022216796
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,48,48,128,1,fp8,fp8,0,8.19543228149414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,48,8,128,1,float16,fp8,0,12.315817260742188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,48,8,128,1,fp8,fp8,0,12.476576232910157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,48,1,128,1,float16,float16,0,6.63299331665039
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,48,8,128,1,float16,float16,0,14.502377319335938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,48,1,128,1,float16,fp8,0,5.7202495574951175
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,48,1,128,1,fp8,fp8,0,5.7570945739746096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,48,2,128,1,float16,fp8,0,5.793998336791992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,48,2,128,1,float16,float16,0,6.79583511352539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,48,2,128,1,fp8,fp8,0,5.954609680175781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,48,4,128,1,float16,fp8,0,6.002369689941406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,48,4,128,1,fp8,fp8,0,5.987348937988282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,48,4,128,1,float16,float16,0,7.2294670104980465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,48,8,128,1,float16,fp8,0,6.2375232696533205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,48,48,128,1,float16,float16,0,4.543910217285156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,48,8,128,1,float16,float16,0,7.230783843994141
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,48,48,128,1,float16,fp8,0,4.138916778564453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,48,1,128,1,float16,float16,0,3.4948848724365233
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,48,1,128,1,float16,fp8,0,3.1586511611938475
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,48,8,128,1,fp8,fp8,0,6.28063850402832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,48,48,128,1,fp8,fp8,0,4.133945465087891
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,48,1,128,1,fp8,fp8,0,2.877724838256836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,48,2,128,1,float16,fp8,0,2.962380790710449
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,48,2,128,1,fp8,fp8,0,2.911774444580078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,48,2,128,1,float16,float16,0,3.3733470916748045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,48,4,128,1,float16,fp8,0,2.9945648193359373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,48,4,128,1,fp8,fp8,0,2.9675535202026366
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,48,4,128,1,float16,float16,0,3.495111846923828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,48,8,128,1,float16,float16,0,3.370515060424805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,48,48,128,1,float16,float16,0,2.250137519836426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,48,1,128,1,float16,float16,0,1.5649312019348145
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,48,8,128,1,float16,fp8,0,3.0664575576782225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,48,1,128,1,float16,fp8,0,1.5222687721252441
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,48,48,128,1,float16,fp8,0,2.2741535186767576
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,48,48,128,1,fp8,fp8,0,2.103505516052246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,48,1,128,1,fp8,fp8,0,1.4940879821777344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,48,8,128,1,fp8,fp8,0,3.0680912017822264
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,48,2,128,1,float16,float16,0,1.5919631958007812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,48,2,128,1,float16,fp8,0,1.637169647216797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,48,2,128,1,fp8,fp8,0,1.466327953338623
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,48,4,128,1,float16,float16,0,1.60118408203125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,48,4,128,1,float16,fp8,0,1.4908176422119142
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,48,4,128,1,fp8,fp8,0,1.4920031547546386
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,48,8,128,1,float16,float16,0,1.6248384475708009
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,48,8,128,1,float16,fp8,0,1.5462911605834961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,48,48,128,1,float16,float16,0,1.1430272102355956
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,48,8,128,1,fp8,fp8,0,1.5981040000915527
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,48,1,128,1,float16,float16,0,0.7627583980560303
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,48,48,128,1,float16,fp8,0,1.0583663940429688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,48,1,128,1,float16,fp8,0,0.7370336055755615
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,48,48,128,1,fp8,fp8,0,1.2049424171447753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,48,1,128,1,fp8,fp8,0,0.7426432132720947
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,48,2,128,1,float16,float16,0,0.7908815860748291
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,48,2,128,1,float16,fp8,0,0.7539328098297119
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,48,2,128,1,fp8,fp8,0,0.7539423942565918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,48,4,128,1,float16,float16,0,0.7841423988342285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,48,4,128,1,float16,fp8,0,0.7640111923217774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,48,4,128,1,fp8,fp8,0,0.7600368022918701
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,48,8,128,1,float16,float16,0,0.8323760032653809
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,48,8,128,1,float16,fp8,0,0.7899519920349121
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,48,8,128,1,fp8,fp8,0,0.7843488216400146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,48,1,128,1,float16,fp8,0,10.81270751953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,48,1,128,1,fp8,fp8,0,10.732061004638672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,48,2,128,1,float16,fp8,0,11.119617462158203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,48,2,128,1,fp8,fp8,0,10.977030181884766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,48,1,128,1,float16,float16,0,12.673783874511718
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,48,2,128,1,float16,float16,0,12.691422271728516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,48,4,128,1,float16,fp8,0,11.16748504638672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,48,4,128,1,float16,float16,0,12.97464599609375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,48,1,128,1,float16,float16,0,6.185011291503907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,48,48,128,1,fp8,fp8,0,8.670804595947265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,48,48,128,1,float16,fp8,0,8.75693130493164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,48,4,128,1,fp8,fp8,0,11.288854217529297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,48,48,128,1,float16,float16,0,9.54862060546875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,48,8,128,1,float16,fp8,0,11.9380126953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,48,8,128,1,fp8,fp8,0,11.803907012939453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,48,8,128,1,float16,float16,0,13.515896606445313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,48,1,128,1,float16,fp8,0,5.4247089385986325
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,48,1,128,1,fp8,fp8,0,5.346396636962891
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,48,2,128,1,float16,fp8,0,5.486751937866211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,48,2,128,1,float16,float16,0,6.159880065917969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,48,2,128,1,fp8,fp8,0,5.487118530273437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,48,4,128,1,float16,fp8,0,5.613735961914062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,48,4,128,1,fp8,fp8,0,5.630575942993164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,48,4,128,1,float16,float16,0,6.6053619384765625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,48,48,128,1,float16,float16,0,4.705105590820312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,48,1,128,1,float16,float16,0,3.34881591796875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,48,8,128,1,float16,fp8,0,5.891209411621094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,48,8,128,1,float16,float16,0,6.7229248046875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,48,48,128,1,float16,fp8,0,4.385454559326172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,48,8,128,1,fp8,fp8,0,6.003857421875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,48,48,128,1,fp8,fp8,0,4.378334426879883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,48,1,128,1,float16,fp8,0,2.69659366607666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,48,1,128,1,fp8,fp8,0,2.7281471252441407
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,48,2,128,1,float16,fp8,0,2.749750328063965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,48,2,128,1,float16,float16,0,3.3398975372314452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,48,2,128,1,fp8,fp8,0,2.81878719329834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,48,4,128,1,float16,fp8,0,2.8266016006469727
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,48,4,128,1,float16,float16,0,3.138688087463379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,48,4,128,1,fp8,fp8,0,2.819264030456543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,48,8,128,1,float16,float16,0,3.295608139038086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,48,8,128,1,float16,fp8,0,2.949718475341797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,48,1,128,1,float16,float16,0,1.4833200454711915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,48,1,128,1,float16,fp8,0,1.359881591796875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,48,48,128,1,float16,float16,0,2.4147199630737304
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,48,48,128,1,float16,fp8,0,2.207019233703613
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,48,8,128,1,fp8,fp8,0,2.9634063720703123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,48,48,128,1,fp8,fp8,0,2.2079183578491213
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,48,1,128,1,fp8,fp8,0,1.4463871955871581
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,48,2,128,1,float16,float16,0,1.4931455612182618
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,48,2,128,1,float16,fp8,0,1.4560832023620605
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,48,2,128,1,fp8,fp8,0,1.382759952545166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,48,4,128,1,float16,float16,0,1.5220239639282227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,48,4,128,1,float16,fp8,0,1.4244463920593262
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,48,4,128,1,fp8,fp8,0,1.4223183631896972
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,48,8,128,1,float16,float16,0,1.5645744323730468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,48,8,128,1,float16,fp8,0,1.5008303642272949
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,48,8,128,1,fp8,fp8,0,1.497377586364746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,48,48,128,1,float16,float16,0,1.1702896118164063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,48,48,128,1,float16,fp8,0,1.1117136001586914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,48,1,128,1,float16,float16,0,0.7340367794036865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,48,48,128,1,fp8,fp8,0,1.173855972290039
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,48,1,128,1,float16,fp8,0,0.7003407955169678
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,48,1,128,1,fp8,fp8,0,0.7541200160980225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,48,2,128,1,float16,float16,0,0.7372432231903077
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,48,2,128,1,float16,fp8,0,0.7047552108764649
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,48,2,128,1,fp8,fp8,0,0.7074336051940918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,48,4,128,1,float16,float16,0,0.7448048114776611
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,48,4,128,1,float16,fp8,0,0.7275311946868896
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,48,4,128,1,fp8,fp8,0,0.7460095882415771
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,48,8,128,1,float16,float16,0,0.7908319950103759
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,48,8,128,1,float16,fp8,0,0.7591887950897217
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,48,8,128,1,fp8,fp8,0,0.7642288208007812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,48,48,128,1,float16,float16,0,0.5970416069030762
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,48,48,128,1,float16,fp8,0,0.5710239887237549
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,48,48,128,1,fp8,fp8,0,0.5690159797668457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,48,1,128,1,float16,float16,0,0.37585279941558836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,48,2,128,1,fp8,fp8,0,0.37023038864135743
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,48,1,128,1,float16,fp8,0,0.36172640323638916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,48,1,128,1,fp8,fp8,0,0.35759360790252687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,48,2,128,1,float16,float16,0,0.3845776081085205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,48,2,128,1,float16,fp8,0,0.37142720222473147
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,48,8,128,1,float16,fp8,0,0.3935247898101807
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,48,4,128,1,float16,float16,0,0.39197280406951907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,48,4,128,1,float16,fp8,0,0.3746943950653076
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,48,4,128,1,fp8,fp8,0,0.3726367950439453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,48,8,128,1,float16,float16,0,0.40911359786987306
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,48,8,128,1,fp8,fp8,0,0.390233588218689
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,48,1,128,1,float16,fp8,0,6.421011352539063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,48,1,128,1,fp8,fp8,0,6.433201599121094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,48,1,128,1,float16,float16,0,7.324937438964843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,48,2,128,1,float16,fp8,0,6.464888000488282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,48,2,128,1,fp8,fp8,0,6.464289855957031
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,48,2,128,1,float16,float16,0,7.544439697265625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,48,4,128,1,float16,fp8,0,6.701884460449219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,48,4,128,1,float16,float16,0,7.575540924072266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,48,1,128,1,float16,float16,0,3.5892097473144533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,48,4,128,1,fp8,fp8,0,6.6970573425292965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,48,48,128,1,float16,float16,0,5.9512992858886715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,48,48,128,1,float16,fp8,0,5.7609294891357425
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,48,48,128,1,fp8,fp8,0,5.745739364624024
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,48,8,128,1,float16,fp8,0,7.193697357177735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,48,8,128,1,fp8,fp8,0,7.1133583068847654
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,48,8,128,1,float16,float16,0,8.248763275146484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,48,1,128,1,float16,fp8,0,3.2528385162353515
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,48,1,128,1,fp8,fp8,0,3.1939023971557616
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,48,2,128,1,float16,fp8,0,3.271209716796875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,48,2,128,1,float16,float16,0,3.6145999908447264
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,48,2,128,1,fp8,fp8,0,3.2640911102294923
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,48,4,128,1,float16,float16,0,3.7507183074951174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,48,4,128,1,float16,fp8,0,3.351220703125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,48,4,128,1,fp8,fp8,0,3.4179664611816407
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,48,8,128,1,float16,float16,0,4.001959991455078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,48,1,128,1,float16,float16,0,1.6776304244995117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,48,1,128,1,float16,fp8,0,1.6166864395141602
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,48,8,128,1,float16,fp8,0,3.580796813964844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,48,48,128,1,float16,float16,0,3.003102493286133
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,48,48,128,1,float16,fp8,0,2.8831760406494142
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,48,8,128,1,fp8,fp8,0,3.5960865020751953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,48,48,128,1,fp8,fp8,0,2.9323936462402345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,48,1,128,1,fp8,fp8,0,1.726982307434082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,48,2,128,1,float16,float16,0,1.7842287063598632
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,48,2,128,1,float16,fp8,0,1.6441072463989257
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,48,2,128,1,fp8,fp8,0,1.6420719146728515
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,48,4,128,1,float16,float16,0,1.785468864440918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,48,4,128,1,float16,fp8,0,1.7035200119018554
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,48,4,128,1,fp8,fp8,0,1.705860710144043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,48,8,128,1,float16,float16,0,1.8991872787475585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,48,8,128,1,float16,fp8,0,1.8675264358520507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,48,48,128,1,float16,float16,0,1.5134431838989257
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,48,8,128,1,fp8,fp8,0,1.806635284423828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,48,1,128,1,float16,float16,0,0.8601712226867676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,48,48,128,1,float16,fp8,0,1.491652774810791
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,48,1,128,1,float16,fp8,0,0.89967041015625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,48,48,128,1,fp8,fp8,0,1.4567407608032226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,48,1,128,1,fp8,fp8,0,0.8251744270324707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,48,2,128,1,float16,float16,0,0.8656160354614257
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,48,2,128,1,float16,fp8,0,0.8367744445800781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,48,2,128,1,fp8,fp8,0,0.8364879608154296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,48,4,128,1,float16,float16,0,0.8995152473449707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,48,4,128,1,float16,fp8,0,0.884823989868164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,48,4,128,1,fp8,fp8,0,0.8674415588378906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,48,8,128,1,float16,float16,0,0.94136962890625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,48,8,128,1,float16,fp8,0,0.9278800010681152
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,48,8,128,1,fp8,fp8,0,0.9170991897583007
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,48,48,128,1,float16,float16,0,0.7720687866210938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,48,48,128,1,float16,fp8,0,0.7397039890289306
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,48,1,128,1,float16,float16,0,0.4384624004364014
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,48,48,128,1,fp8,fp8,0,0.7438560009002686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,48,1,128,1,float16,fp8,0,0.43347678184509275
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,48,1,128,1,fp8,fp8,0,0.42263522148132326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,48,2,128,1,float16,float16,0,0.44561119079589845
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,48,2,128,1,float16,fp8,0,0.43250079154968263
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,48,2,128,1,fp8,fp8,0,0.42955842018127444
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,48,4,128,1,float16,float16,0,0.4598720073699951
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,48,4,128,1,float16,fp8,0,0.44569921493530273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,48,4,128,1,fp8,fp8,0,0.44752001762390137
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,48,48,128,1,float16,fp8,0,0.380841588973999
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,48,8,128,1,float16,float16,0,0.48642559051513673
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,48,8,128,1,float16,fp8,0,0.46996798515319826
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,48,8,128,1,fp8,fp8,0,0.46773438453674315
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,48,48,128,1,float16,float16,0,0.40343360900878905
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,48,48,128,1,fp8,fp8,0,0.38000481128692626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,48,1,128,1,float16,float16,0,0.23190081119537354
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,48,1,128,1,float16,fp8,0,0.22703359127044678
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,48,1,128,1,fp8,fp8,0,0.2257551908493042
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,48,2,128,1,float16,float16,0,0.23441760540008544
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,48,2,128,1,float16,fp8,0,0.22544960975646972
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,48,2,128,1,fp8,fp8,0,0.22558081150054932
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,48,4,128,1,float16,float16,0,0.24188320636749266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,48,4,128,1,float16,fp8,0,0.23348159790039064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,48,4,128,1,fp8,fp8,0,0.23362400531768798
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,48,8,128,1,float16,float16,0,0.254584002494812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,48,8,128,1,float16,fp8,0,0.24793119430541993
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,48,8,128,1,fp8,fp8,0,0.2502863883972168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,48,1,128,1,float16,fp8,0,6.280220794677734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,48,1,128,1,fp8,fp8,0,6.26426887512207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,48,1,128,1,float16,float16,0,7.0887916564941404
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,48,2,128,1,float16,fp8,0,6.454036712646484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,48,2,128,1,fp8,fp8,0,6.454064178466797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,48,2,128,1,float16,float16,0,7.1299278259277346
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,48,4,128,1,float16,fp8,0,6.708278656005859
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,48,4,128,1,float16,float16,0,7.401974487304687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,48,1,128,1,float16,float16,0,3.430894470214844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,48,4,128,1,fp8,fp8,0,6.752969360351562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,48,48,128,1,float16,float16,0,6.653018951416016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,48,48,128,1,float16,fp8,0,6.524811553955078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,48,8,128,1,float16,fp8,0,7.368041229248047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,48,8,128,1,float16,float16,0,8.072328186035156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,48,8,128,1,fp8,fp8,0,7.2952018737792965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,48,48,128,1,fp8,fp8,0,6.485379028320312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,48,1,128,1,float16,fp8,0,3.211592102050781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,48,1,128,1,fp8,fp8,0,3.1576511383056642
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,48,2,128,1,float16,fp8,0,3.24139518737793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,48,2,128,1,float16,float16,0,3.4047374725341797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,48,2,128,1,fp8,fp8,0,3.246227264404297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,48,4,128,1,float16,float16,0,3.644027328491211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,48,4,128,1,float16,fp8,0,3.3392913818359373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,48,4,128,1,fp8,fp8,0,3.3678943634033205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,48,8,128,1,float16,float16,0,3.9168033599853516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,48,8,128,1,float16,fp8,0,3.6662654876708984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,48,1,128,1,float16,float16,0,1.638430404663086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,48,1,128,1,float16,fp8,0,1.5927103996276855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,48,1,128,1,fp8,fp8,0,1.6382415771484375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,48,8,128,1,fp8,fp8,0,3.6470081329345705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,48,48,128,1,float16,float16,0,3.3678958892822264
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,48,48,128,1,float16,fp8,0,3.251617431640625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,48,48,128,1,fp8,fp8,0,3.317041778564453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,48,2,128,1,float16,float16,0,1.699608039855957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,48,2,128,1,float16,fp8,0,1.6320480346679687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,48,2,128,1,fp8,fp8,0,1.6290191650390624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,48,4,128,1,float16,float16,0,1.7712352752685547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,48,4,128,1,float16,fp8,0,1.6867359161376954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,48,4,128,1,fp8,fp8,0,1.711996841430664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,48,8,128,1,float16,float16,0,1.9069183349609375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,48,8,128,1,float16,fp8,0,1.8523088455200196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,48,48,128,1,float16,float16,0,1.7089487075805665
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,48,8,128,1,fp8,fp8,0,1.930227279663086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,48,48,128,1,float16,fp8,0,1.641324806213379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,48,1,128,1,float16,float16,0,0.8309247970581055
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,48,1,128,1,float16,fp8,0,0.8127087593078614
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,48,48,128,1,fp8,fp8,0,1.6801551818847655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,48,1,128,1,fp8,fp8,0,0.8363311767578125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,48,2,128,1,float16,float16,0,0.8579232215881347
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,48,2,128,1,fp8,fp8,0,0.825385570526123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,48,2,128,1,float16,fp8,0,0.8265088081359864
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,48,4,128,1,float16,float16,0,0.9066287994384765
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,48,4,128,1,float16,fp8,0,0.8668288230895996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,48,4,128,1,fp8,fp8,0,0.8631664276123047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,48,8,128,1,float16,float16,0,0.9655407905578614
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,48,8,128,1,float16,fp8,0,0.9483776092529297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,48,8,128,1,fp8,fp8,0,0.9349967956542968
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,48,48,128,1,float16,float16,0,0.8660063743591309
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,48,48,128,1,float16,fp8,0,0.8474271774291993
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,48,48,128,1,fp8,fp8,0,0.8403887748718262
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,48,1,128,1,float16,float16,0,0.43018078804016113
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,48,1,128,1,float16,fp8,0,0.41885762214660643
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,48,1,128,1,fp8,fp8,0,0.4194064140319824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,48,2,128,1,float16,float16,0,0.4373151779174805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,48,2,128,1,float16,fp8,0,0.42834081649780276
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,48,2,128,1,fp8,fp8,0,0.42821760177612306
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,48,4,128,1,float16,float16,0,0.4547408103942871
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,48,4,128,1,float16,fp8,0,0.4452847957611084
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,48,4,128,1,fp8,fp8,0,0.4445024013519287
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,48,8,128,1,float16,float16,0,0.49478721618652344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,48,8,128,1,float16,fp8,0,0.4791855812072754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,48,8,128,1,fp8,fp8,0,0.48020000457763673
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,48,48,128,1,float16,float16,0,0.4456064224243164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,48,48,128,1,float16,fp8,0,0.4330304145812988
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,48,48,128,1,fp8,fp8,0,0.43284001350402834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,48,1,128,1,float16,float16,0,0.22479679584503173
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,48,1,128,1,float16,fp8,0,0.22066400051116944
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,48,1,128,1,fp8,fp8,0,0.21976959705352783
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,48,2,128,1,float16,float16,0,0.23148319721221924
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,48,2,128,1,float16,fp8,0,0.2264240026473999
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,48,2,128,1,fp8,fp8,0,0.22648000717163086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,48,4,128,1,float16,float16,0,0.23978719711303711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,48,4,128,1,float16,fp8,0,0.2343280076980591
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,48,4,128,1,fp8,fp8,0,0.2344048023223877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,48,8,128,1,float16,float16,0,0.25864319801330565
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,48,8,128,1,float16,fp8,0,0.25040640830993655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,48,8,128,1,fp8,fp8,0,0.2528640031814575
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,48,48,128,1,float16,float16,0,0.2353440046310425
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,48,48,128,1,float16,fp8,0,0.2276479959487915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,48,48,128,1,fp8,fp8,0,0.22753760814666749
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,48,1,128,1,float16,float16,0,0.12444640398025512
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,48,1,128,1,float16,fp8,0,0.118286395072937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,48,1,128,1,fp8,fp8,0,0.11690560579299927
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,48,2,128,1,float16,float16,0,0.12528799772262572
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,48,8,128,1,float16,float16,0,0.14083199501037597
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,48,8,128,1,float16,fp8,0,0.13700000047683716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,48,2,128,1,float16,fp8,0,0.11960320472717285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,48,2,128,1,fp8,fp8,0,0.11849119663238525
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,48,4,128,1,float16,float16,0,0.13188639879226685
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,48,4,128,1,float16,fp8,0,0.12582240104675294
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,48,4,128,1,fp8,fp8,0,0.1258080005645752
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,48,8,128,1,fp8,fp8,0,0.13640639781951905
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,48,1,128,1,float16,float16,0,4.135036849975586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,48,1,128,1,float16,fp8,0,3.880204772949219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,48,1,128,1,fp8,fp8,0,3.8730304718017576
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,48,2,128,1,float16,float16,0,4.320113754272461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,48,2,128,1,float16,fp8,0,4.003174209594727
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,48,2,128,1,fp8,fp8,0,3.9934223175048826
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,48,4,128,1,float16,fp8,0,4.184521484375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,48,4,128,1,float16,float16,0,4.540489578247071
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,48,1,128,1,float16,float16,0,2.0566080093383787
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,48,4,128,1,fp8,fp8,0,4.325798416137696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,48,8,128,1,float16,float16,0,4.934819030761719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,48,8,128,1,float16,fp8,0,4.632761764526367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,48,8,128,1,fp8,fp8,0,4.6995086669921875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,48,48,128,1,float16,float16,0,4.552070236206054
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,48,48,128,1,float16,fp8,0,4.472819137573242
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,48,48,128,1,fp8,fp8,0,4.459993743896485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,48,1,128,1,float16,fp8,0,2.0036720275878905
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,48,1,128,1,fp8,fp8,0,1.9760080337524415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,48,2,128,1,float16,float16,0,2.1242223739624024
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,48,2,128,1,float16,fp8,0,2.0060848236083983
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,48,2,128,1,fp8,fp8,0,2.02728157043457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,48,4,128,1,float16,float16,0,2.198958396911621
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,48,4,128,1,float16,fp8,0,2.1137344360351564
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,48,4,128,1,fp8,fp8,0,2.1129632949829102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,48,8,128,1,float16,float16,0,2.4369152069091795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,48,8,128,1,float16,fp8,0,2.3581695556640625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,48,1,128,1,float16,float16,0,1.0118592262268067
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,48,48,128,1,float16,float16,0,2.3027936935424806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,48,8,128,1,fp8,fp8,0,2.413051223754883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,48,1,128,1,float16,fp8,0,0.9869312286376953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,48,48,128,1,float16,fp8,0,2.2644559860229494
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,48,48,128,1,fp8,fp8,0,2.2914255142211912
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,48,1,128,1,fp8,fp8,0,1.0112367630004884
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,48,2,128,1,float16,float16,0,1.0491583824157715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,48,2,128,1,float16,fp8,0,1.0170384407043458
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,48,2,128,1,fp8,fp8,0,1.0147407531738282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,48,4,128,1,float16,float16,0,1.1006367683410645
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,48,4,128,1,float16,fp8,0,1.0642271995544434
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,48,4,128,1,fp8,fp8,0,1.080345630645752
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,48,8,128,1,float16,float16,0,1.2119088172912598
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,48,8,128,1,float16,fp8,0,1.1880895614624023
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,48,8,128,1,fp8,fp8,0,1.1766592025756837
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,48,1,128,1,float16,float16,0,0.5154687881469726
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,48,48,128,1,float16,float16,0,1.1572879791259765
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,48,48,128,1,float16,fp8,0,1.1380096435546876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,48,1,128,1,float16,fp8,0,0.5117152214050293
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,48,1,128,1,fp8,fp8,0,0.5123504161834717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,48,48,128,1,fp8,fp8,0,1.145359992980957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,48,2,128,1,float16,float16,0,0.5317647933959961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,48,2,128,1,float16,fp8,0,0.5199920177459717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,48,2,128,1,fp8,fp8,0,0.5203775882720947
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,48,4,128,1,float16,float16,0,0.5570703983306885
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,48,4,128,1,float16,fp8,0,0.5483888149261474
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,48,4,128,1,fp8,fp8,0,0.5526495933532715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,48,8,128,1,float16,float16,0,0.6166304111480713
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,48,8,128,1,float16,fp8,0,0.6001887798309327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,48,8,128,1,fp8,fp8,0,0.5996352195739746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,48,48,128,1,float16,float16,0,0.5913375854492188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,48,48,128,1,float16,fp8,0,0.5824207782745361
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,48,1,128,1,float16,float16,0,0.27050559520721434
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,48,48,128,1,fp8,fp8,0,0.5830783843994141
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,48,1,128,1,float16,fp8,0,0.266430401802063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,48,1,128,1,fp8,fp8,0,0.26616320610046384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,48,2,128,1,float16,float16,0,0.27596321105957033
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,48,2,128,1,float16,fp8,0,0.2725744009017944
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,48,2,128,1,fp8,fp8,0,0.2721839904785156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,48,4,128,1,float16,float16,0,0.2924607992172241
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,48,4,128,1,float16,fp8,0,0.28788158893585203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,48,4,128,1,fp8,fp8,0,0.2885823965072632
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,48,8,128,1,float16,float16,0,0.32075679302215576
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,48,8,128,1,float16,fp8,0,0.31179039478302
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,48,8,128,1,fp8,fp8,0,0.3133296012878418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,48,48,128,1,float16,float16,0,0.31029438972473145
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,48,1,128,1,fp8,fp8,0,0.14617600440979003
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,48,48,128,1,float16,fp8,0,0.30711519718170166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,48,48,128,1,fp8,fp8,0,0.3060111999511719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,48,1,128,1,float16,float16,0,0.14939359426498414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,48,1,128,1,float16,fp8,0,0.14566080570220946
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,48,2,128,1,float16,float16,0,0.14919840097427367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,48,2,128,1,float16,fp8,0,0.14740159511566162
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,48,2,128,1,fp8,fp8,0,0.14794559478759767
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,48,4,128,1,float16,float16,0,0.1569056034088135
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,48,4,128,1,float16,fp8,0,0.15569280385971068
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,48,4,128,1,fp8,fp8,0,0.1556175947189331
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,48,8,128,1,float16,float16,0,0.17148799896240235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,48,8,128,1,float16,fp8,0,0.16978240013122559
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,48,8,128,1,fp8,fp8,0,0.16827039718627929
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,48,48,128,1,float16,float16,0,0.16692479848861694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,48,48,128,1,float16,fp8,0,0.16376639604568483
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,48,48,128,1,fp8,fp8,0,0.16395519971847533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,48,1,128,1,float16,float16,0,0.08175359964370728
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,48,1,128,1,float16,fp8,0,0.07925919890403747
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,48,1,128,1,fp8,fp8,0,0.07927039861679078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,48,2,128,1,float16,float16,0,0.08462079763412475
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,48,2,128,1,float16,fp8,0,0.07956640124320984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,48,2,128,1,fp8,fp8,0,0.07995679974555969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,48,4,128,1,float16,float16,0,0.08619840145111084
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,48,4,128,1,float16,fp8,0,0.08251039981842041
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,48,4,128,1,fp8,fp8,0,0.08295680284500122
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,48,8,128,1,float16,float16,0,0.09667680263519288
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,48,8,128,1,float16,fp8,0,0.09062719941139222
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,48,8,128,1,fp8,fp8,0,0.09138240218162537
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,48,1,128,1,float16,float16,0,4.3007568359375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,48,1,128,1,float16,fp8,0,4.0889232635498045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,48,1,128,1,fp8,fp8,0,4.071814346313476
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,48,2,128,1,float16,float16,0,4.408915328979492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,48,2,128,1,float16,fp8,0,4.237575912475586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,48,2,128,1,fp8,fp8,0,4.236387252807617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,48,4,128,1,float16,float16,0,4.634936141967773
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,48,4,128,1,float16,fp8,0,4.51812973022461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,48,1,128,1,float16,float16,0,2.0835376739501954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,48,4,128,1,fp8,fp8,0,4.557320022583008
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,48,8,128,1,float16,float16,0,5.224721527099609
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,48,8,128,1,float16,fp8,0,5.0919841766357425
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,48,8,128,1,fp8,fp8,0,5.149188613891601
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,48,48,128,1,float16,float16,0,5.465966415405274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,48,48,128,1,float16,fp8,0,5.410136032104492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,48,1,128,1,float16,fp8,0,2.0805456161499025
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,48,1,128,1,fp8,fp8,0,2.059297561645508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,48,48,128,1,fp8,fp8,0,5.471350479125976
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,48,2,128,1,float16,float16,0,2.184561538696289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,48,2,128,1,float16,fp8,0,2.123721694946289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,48,2,128,1,fp8,fp8,0,2.143937683105469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,48,4,128,1,float16,float16,0,2.312881660461426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,48,4,128,1,float16,fp8,0,2.2741647720336915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,48,4,128,1,fp8,fp8,0,2.2824527740478517
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,48,8,128,1,float16,float16,0,2.5987071990966797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,48,8,128,1,float16,fp8,0,2.5791759490966797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,48,8,128,1,fp8,fp8,0,2.581755256652832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,48,1,128,1,float16,float16,0,1.0462384223937988
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,48,1,128,1,float16,fp8,0,1.0481632232666016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,48,48,128,1,float16,float16,0,2.758083152770996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,48,1,128,1,fp8,fp8,0,1.0809632301330567
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,48,2,128,1,float16,float16,0,1.087883186340332
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,48,48,128,1,float16,fp8,0,2.7330272674560545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,48,2,128,1,float16,fp8,0,1.082102394104004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,48,48,128,1,fp8,fp8,0,2.7431631088256836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,48,2,128,1,fp8,fp8,0,1.0845423698425294
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,48,4,128,1,float16,fp8,0,1.1427488327026367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,48,4,128,1,float16,float16,0,1.1615103721618651
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,48,4,128,1,fp8,fp8,0,1.1465680122375488
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,48,8,128,1,float16,float16,0,1.3119215965270996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,48,8,128,1,float16,fp8,0,1.2983903884887695
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,48,8,128,1,fp8,fp8,0,1.2987168312072754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,48,1,128,1,float16,float16,0,0.5351615905761719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,48,48,128,1,float16,float16,0,1.3955663681030273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,48,1,128,1,float16,fp8,0,0.5310256004333496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,48,48,128,1,float16,fp8,0,1.3784159660339355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,48,48,128,1,fp8,fp8,0,1.3697360038757325
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,48,1,128,1,fp8,fp8,0,0.5305744171142578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,48,2,128,1,float16,float16,0,0.5539743900299072
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,48,2,128,1,float16,fp8,0,0.551966381072998
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,48,2,128,1,fp8,fp8,0,0.5550591945648193
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,48,4,128,1,float16,float16,0,0.5879712104797363
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,48,4,128,1,float16,fp8,0,0.5888336181640625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,48,4,128,1,fp8,fp8,0,0.5825376033782959
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,48,8,128,1,float16,float16,0,0.6600560188293457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,48,8,128,1,float16,fp8,0,0.6612287998199463
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,48,8,128,1,fp8,fp8,0,0.6553055763244628
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,48,48,128,1,float16,float16,0,0.714131212234497
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,48,48,128,1,float16,fp8,0,0.6974448204040528
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,48,1,128,1,float16,float16,0,0.2774064064025879
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,48,48,128,1,fp8,fp8,0,0.7020095825195313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,48,1,128,1,float16,fp8,0,0.27753920555114747
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,48,1,128,1,fp8,fp8,0,0.281113600730896
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,48,2,128,1,float16,float16,0,0.288155198097229
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,48,2,128,1,float16,fp8,0,0.28790879249572754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,48,2,128,1,fp8,fp8,0,0.2871167898178101
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,48,4,128,1,float16,float16,0,0.30510079860687256
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,48,4,128,1,float16,fp8,0,0.3063472032546997
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,48,4,128,1,fp8,fp8,0,0.302454400062561
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,48,8,128,1,float16,float16,0,0.34130239486694336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,48,8,128,1,float16,fp8,0,0.34092960357666013
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,48,8,128,1,fp8,fp8,0,0.34194719791412354
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,48,48,128,1,float16,float16,0,0.36874558925628664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,48,48,128,1,float16,fp8,0,0.36066720485687254
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,48,48,128,1,fp8,fp8,0,0.3632944107055664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,48,1,128,1,float16,float16,0,0.14956480264663696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,48,1,128,1,float16,fp8,0,0.14926719665527344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,48,1,128,1,fp8,fp8,0,0.149891197681427
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,48,2,128,1,float16,float16,0,0.1575376033782959
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,48,2,128,1,float16,fp8,0,0.15561920404434204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,48,2,128,1,fp8,fp8,0,0.15548160076141357
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,48,4,128,1,float16,float16,0,0.1659983992576599
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,48,4,128,1,float16,fp8,0,0.16262079477310182
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,48,4,128,1,fp8,fp8,0,0.16375199556350709
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,48,8,128,1,float16,float16,0,0.18303840160369872
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,48,8,128,1,float16,fp8,0,0.18183519840240478
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,48,8,128,1,fp8,fp8,0,0.18318719863891603
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,48,48,128,1,float16,float16,0,0.19597760438919068
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,48,48,128,1,float16,fp8,0,0.19105119705200196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,48,48,128,1,fp8,fp8,0,0.19222559928894042
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,48,1,128,1,float16,float16,0,0.08649439811706543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,48,1,128,1,float16,fp8,0,0.08268160223960877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,48,1,128,1,fp8,fp8,0,0.08206560015678406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,48,2,128,1,float16,float16,0,0.087772798538208
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,48,2,128,1,float16,fp8,0,0.08369920253753663
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,48,2,128,1,fp8,fp8,0,0.08397600054740906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,48,8,128,1,fp8,fp8,0,0.10082720518112183
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,48,4,128,1,float16,float16,0,0.09268479943275451
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,48,4,128,1,float16,fp8,0,0.0902559995651245
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,48,4,128,1,fp8,fp8,0,0.0903760015964508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,48,8,128,1,float16,float16,0,0.10108319520950318
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,48,8,128,1,float16,fp8,0,0.09933599829673767
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,48,48,128,1,float16,float16,0,0.11129599809646606
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,48,48,128,1,float16,fp8,0,0.10667359828948975
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,48,48,128,1,fp8,fp8,0,0.10645920038223267
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,48,1,128,1,float16,float16,0,0.0491456001996994
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,48,1,128,1,float16,fp8,0,0.04941920042037964
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,48,1,128,1,fp8,fp8,0,0.04945760071277618
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,48,2,128,1,float16,float16,0,0.04975039958953857
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,48,2,128,1,float16,fp8,0,0.04963679909706116
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,48,2,128,1,fp8,fp8,0,0.04981279969215393
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,48,4,128,1,float16,float16,0,0.05137280225753784
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,48,4,128,1,float16,fp8,0,0.05041599869728088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,48,4,128,1,fp8,fp8,0,0.050475198030471805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,48,8,128,1,float16,float16,0,0.057073599100112914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,48,8,128,1,float16,fp8,0,0.05562719702720642
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,48,8,128,1,fp8,fp8,0,0.05552639961242676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,48,1,128,1,float16,float16,0,2.871536064147949
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,48,1,128,1,float16,fp8,0,2.964182472229004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,48,1,128,1,fp8,fp8,0,2.9814735412597657
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,48,2,128,1,float16,float16,0,3.026576042175293
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,48,2,128,1,float16,fp8,0,3.106166458129883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,48,2,128,1,fp8,fp8,0,3.1124048233032227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,48,4,128,1,float16,float16,0,3.304087829589844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,48,4,128,1,float16,fp8,0,3.3909393310546876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,48,4,128,1,fp8,fp8,0,3.4232479095458985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,48,1,128,1,float16,float16,0,1.4517871856689453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,48,8,128,1,float16,float16,0,3.919054412841797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,48,8,128,1,float16,fp8,0,3.992046356201172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,48,8,128,1,fp8,fp8,0,4.00482063293457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,48,1,128,1,float16,fp8,0,1.5044544219970704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,48,1,128,1,fp8,fp8,0,1.4945664405822754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,48,48,128,1,float16,float16,0,4.877643203735351
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,48,2,128,1,float16,float16,0,1.5235919952392578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,48,48,128,1,float16,fp8,0,4.895588684082031
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,48,2,128,1,float16,fp8,0,1.5670255661010741
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,48,48,128,1,fp8,fp8,0,4.931820678710937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,48,2,128,1,fp8,fp8,0,1.5774415969848632
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,48,4,128,1,float16,float16,0,1.6824848175048828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,48,4,128,1,float16,fp8,0,1.706617546081543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,48,4,128,1,fp8,fp8,0,1.7065568923950196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,48,8,128,1,float16,float16,0,1.9688352584838866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,48,8,128,1,float16,fp8,0,2.001801681518555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,48,8,128,1,fp8,fp8,0,2.0063808441162108
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,48,1,128,1,float16,float16,0,0.7380400180816651
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,48,48,128,1,float16,float16,0,2.4701520919799806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,48,1,128,1,float16,fp8,0,0.7586815834045411
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,48,48,128,1,float16,fp8,0,2.477128028869629
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,48,1,128,1,fp8,fp8,0,0.7590703964233398
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,48,2,128,1,float16,float16,0,0.7789792060852051
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,48,2,128,1,float16,fp8,0,0.7943520069122314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,48,48,128,1,fp8,fp8,0,2.4615152359008787
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,48,2,128,1,fp8,fp8,0,0.8005328178405762
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,48,4,128,1,float16,float16,0,0.8485520362854004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,48,4,128,1,float16,fp8,0,0.8721504211425781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,48,4,128,1,fp8,fp8,0,0.8643919944763183
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,48,8,128,1,float16,float16,0,0.9995823860168457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,48,8,128,1,float16,fp8,0,1.0112895965576172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,48,8,128,1,fp8,fp8,0,1.0146656036376953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,48,1,128,1,float16,float16,0,0.38230080604553224
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,48,48,128,1,float16,float16,0,1.2517791748046876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,48,1,128,1,float16,fp8,0,0.3923856019973755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,48,1,128,1,fp8,fp8,0,0.3911007881164551
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,48,48,128,1,float16,fp8,0,1.2434368133544922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,48,2,128,1,float16,float16,0,0.40157279968261717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,48,48,128,1,fp8,fp8,0,1.2423104286193847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,48,2,128,1,float16,fp8,0,0.4108799934387207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,48,2,128,1,fp8,fp8,0,0.40828962326049806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,48,4,128,1,float16,float16,0,0.43773441314697265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,48,8,128,1,float16,float16,0,0.5118224143981933
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,48,4,128,1,float16,fp8,0,0.44483041763305664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,48,4,128,1,fp8,fp8,0,0.44468321800231936
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,48,8,128,1,float16,fp8,0,0.5178192138671875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,48,8,128,1,fp8,fp8,0,0.5183135986328125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,48,1,128,1,float16,float16,0,0.20433599948883058
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,48,48,128,1,float16,float16,0,0.6346015930175781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,48,1,128,1,float16,fp8,0,0.20938079357147216
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,48,48,128,1,fp8,fp8,0,0.633684778213501
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,48,1,128,1,fp8,fp8,0,0.20918080806732178
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,48,48,128,1,float16,fp8,0,0.6327328205108642
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,48,2,128,1,float16,float16,0,0.21307361125946045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,48,2,128,1,float16,fp8,0,0.21691520214080812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,48,2,128,1,fp8,fp8,0,0.21698880195617676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,48,4,128,1,float16,float16,0,0.22900159358978273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,48,4,128,1,float16,fp8,0,0.23440160751342773
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,48,4,128,1,fp8,fp8,0,0.23507521152496338
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,48,8,128,1,float16,float16,0,0.26807360649108886
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,48,8,128,1,float16,fp8,0,0.27186241149902346
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,48,8,128,1,fp8,fp8,0,0.2715312004089355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,48,48,128,1,float16,float16,0,0.3299599885940552
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,48,48,128,1,float16,fp8,0,0.3286639928817749
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,48,48,128,1,fp8,fp8,0,0.3281264066696167
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,48,1,128,1,float16,float16,0,0.11173280477523803
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,48,1,128,1,float16,fp8,0,0.11597599983215331
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,48,1,128,1,fp8,fp8,0,0.11577279567718506
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,48,2,128,1,float16,float16,0,0.11871839761734009
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,48,2,128,1,float16,fp8,0,0.12124160528182984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,48,2,128,1,fp8,fp8,0,0.12203199863433838
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,48,4,128,1,float16,float16,0,0.12684160470962524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,48,4,128,1,float16,fp8,0,0.12873760461807252
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,48,4,128,1,fp8,fp8,0,0.12887840270996093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,48,8,128,1,float16,float16,0,0.14615039825439452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,48,8,128,1,float16,fp8,0,0.14818719625473023
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,48,8,128,1,fp8,fp8,0,0.1476207971572876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,48,48,128,1,float16,float16,0,0.17696800231933593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,48,48,128,1,float16,fp8,0,0.17599680423736572
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,48,48,128,1,fp8,fp8,0,0.17556639909744262
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,48,1,128,1,float16,float16,0,0.06890239715576171
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,48,1,128,1,float16,fp8,0,0.06625120043754577
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,48,1,128,1,fp8,fp8,0,0.06668319702148437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,48,2,128,1,float16,float16,0,0.06951040029525757
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,48,2,128,1,float16,fp8,0,0.06813600063323974
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,48,2,128,1,fp8,fp8,0,0.06863840222358704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,48,4,128,1,float16,float16,0,0.07566400170326233
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,48,4,128,1,float16,fp8,0,0.0750656008720398
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,48,4,128,1,fp8,fp8,0,0.0747439980506897
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,48,8,128,1,float16,float16,0,0.08330240249633789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,48,8,128,1,float16,fp8,0,0.08457599878311158
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,48,8,128,1,fp8,fp8,0,0.0845583975315094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,48,48,128,1,float16,float16,0,0.09941759705543518
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,48,48,128,1,float16,fp8,0,0.09607359766960144
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,48,48,128,1,fp8,fp8,0,0.0964128017425537
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,48,1,128,1,float16,float16,0,0.04184800088405609
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,48,1,128,1,float16,fp8,0,0.04271839857101441
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,48,1,128,1,fp8,fp8,0,0.0421968013048172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,48,2,128,1,float16,float16,0,0.042228800058364865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,48,2,128,1,float16,fp8,0,0.04300479888916016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,48,2,128,1,fp8,fp8,0,0.04256959855556488
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,48,4,128,1,float16,float16,0,0.043300798535346983
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,48,4,128,1,float16,fp8,0,0.0438400000333786
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,48,4,128,1,fp8,fp8,0,0.043572801351547244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,48,8,128,1,float16,float16,0,0.04848639965057373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,48,8,128,1,float16,fp8,0,0.04839999973773956
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,48,8,128,1,fp8,fp8,0,0.04896000027656555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,48,48,128,1,float16,float16,0,0.055776000022888184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,48,48,128,1,float16,fp8,0,0.0546239972114563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,48,48,128,1,fp8,fp8,0,0.05470560193061828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,48,1,128,1,float16,float16,0,0.03077920079231262
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,48,1,128,1,float16,fp8,0,0.03046880066394806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,48,1,128,1,fp8,fp8,0,0.030721598863601686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,48,2,128,1,float16,float16,0,0.03052319884300232
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,48,2,128,1,float16,fp8,0,0.030955201387405394
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,48,2,128,1,fp8,fp8,0,0.030704000592231752
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,48,4,128,1,float16,float16,0,0.030801600217819212
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,48,4,128,1,float16,fp8,0,0.030947199463844298
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,48,4,128,1,fp8,fp8,0,0.030983999371528625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,48,8,128,1,float16,float16,0,0.032046398520469664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,48,8,128,1,float16,fp8,0,0.03341279923915863
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,48,8,128,1,fp8,fp8,0,0.03358719944953918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,48,1,128,1,float16,float16,0,1.1573455810546875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,48,1,128,1,float16,fp8,0,1.227513599395752
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,48,1,128,1,fp8,fp8,0,1.2277728080749513
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,48,2,128,1,float16,float16,0,1.2308079719543457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,48,2,128,1,float16,fp8,0,1.2969327926635743
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,48,2,128,1,fp8,fp8,0,1.2980336189270019
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,48,4,128,1,float16,float16,0,1.3840767860412597
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,48,4,128,1,float16,fp8,0,1.4441231727600097
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,48,4,128,1,fp8,fp8,0,1.440976047515869
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,48,8,128,1,float16,float16,0,1.6750143051147461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,48,8,128,1,float16,fp8,0,1.7388959884643556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,48,8,128,1,fp8,fp8,0,1.7351808547973633
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,48,1,128,1,float16,float16,0,0.59060640335083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,48,1,128,1,float16,fp8,0,0.6275040149688721
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,48,1,128,1,fp8,fp8,0,0.6288527965545654
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,48,48,128,1,float16,float16,0,2.322256088256836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,48,2,128,1,float16,float16,0,0.628220796585083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,48,48,128,1,float16,fp8,0,2.2947248458862304
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,48,2,128,1,float16,fp8,0,0.6632495880126953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,48,48,128,1,fp8,fp8,0,2.293065643310547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,48,4,128,1,float16,float16,0,0.702782392501831
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,48,2,128,1,fp8,fp8,0,0.6614943981170655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,48,4,128,1,float16,fp8,0,0.7308703899383545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,48,4,128,1,fp8,fp8,0,0.7307216167449951
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,48,8,128,1,float16,float16,0,0.8487903594970703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,48,8,128,1,float16,fp8,0,0.8838768005371094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,48,8,128,1,fp8,fp8,0,0.8778688430786132
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,48,1,128,1,float16,float16,0,0.3083456039428711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,48,48,128,1,float16,float16,0,1.1791888236999513
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,48,1,128,1,float16,fp8,0,0.32588160037994385
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,48,48,128,1,float16,fp8,0,1.1579872131347657
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,48,1,128,1,fp8,fp8,0,0.3248127937316895
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,48,48,128,1,fp8,fp8,0,1.1580320358276368
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,48,2,128,1,float16,float16,0,0.3291856050491333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,48,4,128,1,fp8,fp8,0,0.37884159088134767
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,48,2,128,1,float16,fp8,0,0.34419679641723633
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,48,2,128,1,fp8,fp8,0,0.34395360946655273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,48,4,128,1,float16,float16,0,0.36307361125946047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,48,4,128,1,float16,fp8,0,0.37803359031677247
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,48,8,128,1,float16,float16,0,0.43559041023254397
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,48,8,128,1,float16,fp8,0,0.4508016109466553
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,48,8,128,1,fp8,fp8,0,0.4519951820373535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,48,48,128,1,float16,float16,0,0.6021999835968017
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,48,48,128,1,float16,fp8,0,0.591535997390747
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,48,1,128,1,float16,float16,0,0.16695040464401245
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,48,48,128,1,fp8,fp8,0,0.5910223960876465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,48,1,128,1,float16,fp8,0,0.1753600001335144
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,48,1,128,1,fp8,fp8,0,0.174891197681427
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,48,2,128,1,float16,float16,0,0.1749776005744934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,48,2,128,1,float16,fp8,0,0.18472800254821778
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,48,2,128,1,fp8,fp8,0,0.18410559892654418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,48,4,128,1,float16,float16,0,0.1936992049217224
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,48,4,128,1,float16,fp8,0,0.20157759189605712
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,48,4,128,1,fp8,fp8,0,0.20123679637908937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,48,8,128,1,float16,float16,0,0.22946720123291015
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,48,8,128,1,float16,fp8,0,0.23640480041503906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,48,8,128,1,fp8,fp8,0,0.23670239448547364
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,48,48,128,1,float16,float16,0,0.31320478916168215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,48,48,128,1,float16,fp8,0,0.30724639892578126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,48,1,128,1,float16,float16,0,0.0942304015159607
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,48,48,128,1,fp8,fp8,0,0.3070784091949463
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,48,1,128,1,float16,fp8,0,0.09828000068664551
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,48,1,128,1,fp8,fp8,0,0.09795519709587097
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,48,2,128,1,float16,float16,0,0.10030720233917237
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,48,2,128,1,float16,fp8,0,0.10404319763183593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,48,2,128,1,fp8,fp8,0,0.10476000308990478
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,48,4,128,1,float16,float16,0,0.1074671983718872
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,48,4,128,1,float16,fp8,0,0.11192799806594848
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,48,4,128,1,fp8,fp8,0,0.11123360395431518
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,48,8,128,1,float16,float16,0,0.12635200023651122
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,48,8,128,1,float16,fp8,0,0.12986559867858888
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,48,8,128,1,fp8,fp8,0,0.1303328037261963
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,48,48,128,1,float16,float16,0,0.16878080368041992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,48,48,128,1,float16,fp8,0,0.16544159650802612
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,48,2,128,1,float16,fp8,0,0.05917440056800842
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,48,48,128,1,fp8,fp8,0,0.1657055974006653
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,48,1,128,1,float16,float16,0,0.0590287983417511
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,48,1,128,1,float16,fp8,0,0.05809440016746521
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,48,1,128,1,fp8,fp8,0,0.058457601070404056
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,48,2,128,1,float16,float16,0,0.059811198711395265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,48,2,128,1,fp8,fp8,0,0.05877439975738526
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,48,4,128,1,float16,float16,0,0.06647359728813171
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,48,4,128,1,float16,fp8,0,0.06582880020141602
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,48,4,128,1,fp8,fp8,0,0.06578559875488281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,48,8,128,1,float16,float16,0,0.07376959919929504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,48,8,128,1,float16,fp8,0,0.075135999917984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,48,8,128,1,fp8,fp8,0,0.07516319751739502
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,48,48,128,1,float16,float16,0,0.09455519914627075
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,48,48,128,1,float16,fp8,0,0.09129920005798339
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,48,48,128,1,fp8,fp8,0,0.09114720225334168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,48,1,128,1,float16,float16,0,0.035812801122665404
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,48,1,128,1,float16,fp8,0,0.03775359988212586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,48,1,128,1,fp8,fp8,0,0.037771201133728026
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,48,2,128,1,float16,float16,0,0.03550559878349304
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,48,2,128,1,float16,fp8,0,0.03759840130805969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,48,2,128,1,fp8,fp8,0,0.03754720091819763
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,48,4,128,1,float16,float16,0,0.037190398573875426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,48,4,128,1,float16,fp8,0,0.03914400041103363
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,48,4,128,1,fp8,fp8,0,0.038863998651504514
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,48,8,128,1,float16,float16,0,0.042766401171684267
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,48,8,128,1,float16,fp8,0,0.0435344010591507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,48,8,128,1,fp8,fp8,0,0.043654400110244754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,48,48,128,1,float16,float16,0,0.052393597364425656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,48,48,128,1,float16,fp8,0,0.05188639760017395
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,48,48,128,1,fp8,fp8,0,0.05118079781532288
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,48,1,128,1,float16,float16,0,0.029070401191711427
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,48,1,128,1,float16,fp8,0,0.028638398647308348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,48,1,128,1,fp8,fp8,0,0.0287663996219635
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,48,2,128,1,float16,float16,0,0.028753599524497984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,48,2,128,1,float16,fp8,0,0.028483200073242187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,48,2,128,1,fp8,fp8,0,0.02873919904232025
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,48,4,128,1,float16,float16,0,0.029183998703956604
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,48,4,128,1,float16,fp8,0,0.029340800642967225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,48,4,128,1,fp8,fp8,0,0.029180800914764403
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,48,8,128,1,float16,float16,0,0.03003840148448944
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,48,8,128,1,float16,fp8,0,0.0313647985458374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,48,8,128,1,fp8,fp8,0,0.03125759959220886
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,48,48,128,1,float16,float16,0,0.03292160034179688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,48,48,128,1,float16,fp8,0,0.03394719958305359
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,48,48,128,1,fp8,fp8,0,0.03400959968566895
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,48,1,128,1,float16,float16,0,0.023262399435043334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,48,1,128,1,float16,fp8,0,0.02399040013551712
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,48,1,128,1,fp8,fp8,0,0.023921599984169005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,48,2,128,1,float16,float16,0,0.023001599311828613
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,48,2,128,1,float16,fp8,0,0.024299199879169463
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,48,2,128,1,fp8,fp8,0,0.024281600117683412
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,48,4,128,1,float16,float16,0,0.023684799671173096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,48,4,128,1,float16,fp8,0,0.02407519966363907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,48,4,128,1,fp8,fp8,0,0.024320000410079957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,48,8,128,1,float16,float16,0,0.02401279956102371
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,48,8,128,1,float16,fp8,0,0.024558399617671967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,48,8,128,1,fp8,fp8,0,0.024115200340747833
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,48,1,128,1,float16,float16,0,0.551854419708252
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,48,1,128,1,float16,fp8,0,0.5959792137145996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,48,1,128,1,fp8,fp8,0,0.5962224006652832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,48,4,128,1,float16,float16,0,0.6596960067749024
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,48,2,128,1,float16,float16,0,0.5881440162658691
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,48,2,128,1,float16,fp8,0,0.6310351848602295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,48,2,128,1,fp8,fp8,0,0.6327631950378418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,48,4,128,1,float16,fp8,0,0.7066864013671875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,48,4,128,1,fp8,fp8,0,0.7056079864501953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,48,8,128,1,float16,float16,0,0.8043120384216309
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,48,8,128,1,float16,fp8,0,0.8529248237609863
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,48,8,128,1,fp8,fp8,0,0.8511136054992676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,48,48,128,1,float16,float16,0,1.1477775573730469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,48,1,128,1,float16,float16,0,0.287062406539917
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,48,1,128,1,float16,fp8,0,0.3109071969985962
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,48,48,128,1,float16,fp8,0,1.1577967643737792
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,48,1,128,1,fp8,fp8,0,0.3117248058319092
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,48,48,128,1,fp8,fp8,0,1.1560943603515625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,48,2,128,1,float16,float16,0,0.30788478851318357
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,48,2,128,1,float16,fp8,0,0.3288912057876587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,48,2,128,1,fp8,fp8,0,0.33112320899963377
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,48,4,128,1,float16,float16,0,0.3379312038421631
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,48,4,128,1,float16,fp8,0,0.3645008087158203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,48,4,128,1,fp8,fp8,0,0.3638175964355469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,48,8,128,1,float16,float16,0,0.415118408203125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,48,8,128,1,float16,fp8,0,0.43828158378601073
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,48,1,128,1,float16,fp8,0,0.16779040098190307
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,48,8,128,1,fp8,fp8,0,0.4389488220214844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,48,48,128,1,float16,float16,0,0.5876031875610351
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,48,48,128,1,float16,fp8,0,0.5911248207092286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,48,1,128,1,float16,float16,0,0.15544480085372925
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,48,48,128,1,fp8,fp8,0,0.5906367778778077
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,48,1,128,1,fp8,fp8,0,0.1699504017829895
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,48,2,128,1,float16,float16,0,0.16335519552230834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,48,2,128,1,float16,fp8,0,0.17497919797897338
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,48,2,128,1,fp8,fp8,0,0.1763584017753601
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,48,8,128,1,fp8,fp8,0,0.2315295934677124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,48,4,128,1,float16,float16,0,0.1832703948020935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,48,4,128,1,float16,fp8,0,0.1963152050971985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,48,4,128,1,fp8,fp8,0,0.19555360078811646
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,48,8,128,1,float16,float16,0,0.21766080856323242
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,48,8,128,1,float16,fp8,0,0.23214399814605713
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,48,48,128,1,float16,float16,0,0.3065743923187256
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,48,48,128,1,float16,fp8,0,0.3051664113998413
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,48,48,128,1,fp8,fp8,0,0.30425760746002195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,48,1,128,1,float16,float16,0,0.08943359851837158
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,48,1,128,1,float16,fp8,0,0.09454879760742188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,48,1,128,1,fp8,fp8,0,0.09417920112609864
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,48,2,128,1,float16,float16,0,0.09566079974174499
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,48,2,128,1,float16,fp8,0,0.10028320550918579
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,48,2,128,1,fp8,fp8,0,0.10045280456542968
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,48,4,128,1,float16,float16,0,0.10387040376663208
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,48,4,128,1,float16,fp8,0,0.10748800039291381
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,48,4,128,1,fp8,fp8,0,0.10783840417861938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,48,8,128,1,float16,float16,0,0.12220159769058228
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,48,8,128,1,float16,fp8,0,0.12590399980545045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,48,8,128,1,fp8,fp8,0,0.12561919689178466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,48,48,128,1,float16,float16,0,0.16544320583343505
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,48,48,128,1,float16,fp8,0,0.161188805103302
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,48,48,128,1,fp8,fp8,0,0.16154719591140748
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,48,1,128,1,float16,float16,0,0.05634719729423523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,48,1,128,1,float16,fp8,0,0.05310720205307007
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,48,1,128,1,fp8,fp8,0,0.05384640097618103
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,48,2,128,1,float16,float16,0,0.057055997848510745
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,48,2,128,1,float16,fp8,0,0.05497599840164184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,48,2,128,1,fp8,fp8,0,0.0543615996837616
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,48,4,128,1,float16,float16,0,0.06307839751243591
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,48,4,128,1,float16,fp8,0,0.06072480082511902
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,48,4,128,1,fp8,fp8,0,0.06123520135879516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,48,8,128,1,float16,float16,0,0.0711247980594635
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,48,8,128,1,float16,fp8,0,0.07013279795646668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,48,8,128,1,fp8,fp8,0,0.07052479982376099
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,48,48,128,1,float16,float16,0,0.09291520118713378
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,48,48,128,1,float16,fp8,0,0.08516960144042969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,48,48,128,1,fp8,fp8,0,0.0849295973777771
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,48,1,128,1,float16,float16,0,0.032441601157188416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,48,1,128,1,float16,fp8,0,0.03409120142459869
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,48,1,128,1,fp8,fp8,0,0.034092798829078674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,48,2,128,1,float16,float16,0,0.03285279870033264
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,48,2,128,1,float16,fp8,0,0.03442400097846985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,48,8,128,1,float16,float16,0,0.039689600467681885
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,48,2,128,1,fp8,fp8,0,0.03452959954738617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,48,4,128,1,float16,float16,0,0.03378719985485077
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,48,4,128,1,float16,fp8,0,0.035195198655128476
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,48,4,128,1,fp8,fp8,0,0.03546879887580871
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,48,8,128,1,float16,fp8,0,0.03967519998550415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,48,8,128,1,fp8,fp8,0,0.040067198872566226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,48,48,128,1,float16,float16,0,0.05104960203170776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,48,48,128,1,float16,fp8,0,0.04907839894294739
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,48,48,128,1,fp8,fp8,0,0.04855999946594238
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,48,1,128,1,float16,float16,0,0.027470400929450987
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,48,1,128,1,float16,fp8,0,0.028353598713874818
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,48,1,128,1,fp8,fp8,0,0.02829119861125946
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,48,2,128,1,float16,float16,0,0.027684798836708067
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,48,2,128,1,float16,fp8,0,0.02863520085811615
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,48,2,128,1,fp8,fp8,0,0.02852480113506317
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,48,4,128,1,float16,float16,0,0.027876800298690795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,48,4,128,1,float16,fp8,0,0.02871040105819702
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,48,4,128,1,fp8,fp8,0,0.028921601176261903
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,48,8,128,1,float16,float16,0,0.02848159968852997
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,48,8,128,1,float16,fp8,0,0.029582399129867553
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,48,8,128,1,fp8,fp8,0,0.02974559962749481
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,48,48,128,1,float16,float16,0,0.031436800956726074
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,48,48,128,1,float16,fp8,0,0.03181760013103485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,48,48,128,1,fp8,fp8,0,0.03187040090560913
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,48,1,128,1,float16,float16,0,0.021371200680732727
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,48,1,128,1,float16,fp8,0,0.02210720032453537
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,48,1,128,1,fp8,fp8,0,0.022230400145053862
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,48,2,128,1,float16,float16,0,0.021495999395847322
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,48,2,128,1,float16,fp8,0,0.022468799352645875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,48,2,128,1,fp8,fp8,0,0.022368000447750093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,48,4,128,1,float16,float16,0,0.021993599832057953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,48,4,128,1,float16,fp8,0,0.022407999634742735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,48,4,128,1,fp8,fp8,0,0.022303999960422517
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,48,8,128,1,float16,float16,0,0.022144000232219695
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,48,8,128,1,float16,fp8,0,0.02306240051984787
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,48,8,128,1,fp8,fp8,0,0.02269120067358017
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,48,48,128,1,float16,float16,0,0.024447999894618988
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,48,48,128,1,float16,fp8,0,0.025047999620437623
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,48,48,128,1,fp8,fp8,0,0.024673600494861603
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,48,1,128,1,float16,float16,0,0.020584000647068022
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,48,1,128,1,float16,fp8,0,0.02104319930076599
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,48,1,128,1,fp8,fp8,0,0.021316799521446227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,48,2,128,1,float16,float16,0,0.020796799659729005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,48,2,128,1,float16,fp8,0,0.021401600539684297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,48,2,128,1,fp8,fp8,0,0.021430400013923646
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,48,4,128,1,float16,float16,0,0.02083040028810501
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,48,4,128,1,float16,fp8,0,0.02159679979085922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,48,4,128,1,fp8,fp8,0,0.021686400473117828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,48,8,128,1,float16,float16,0,0.02099359929561615
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,48,8,128,1,float16,fp8,0,0.021328000724315642
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,48,8,128,1,fp8,fp8,0,0.02171359956264496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,48,1,128,1,float16,float16,0,0.2849231958389282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,48,1,128,1,float16,fp8,0,0.3097872018814087
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,48,1,128,1,fp8,fp8,0,0.3102288007736206
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,48,2,128,1,float16,float16,0,0.3069024085998535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,48,2,128,1,float16,fp8,0,0.3288016080856323
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,48,2,128,1,fp8,fp8,0,0.3295376062393188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,48,4,128,1,float16,float16,0,0.3403471946716309
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,48,4,128,1,float16,fp8,0,0.36699841022491453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,48,4,128,1,fp8,fp8,0,0.3652031898498535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,48,8,128,1,float16,float16,0,0.41080322265625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,48,8,128,1,float16,fp8,0,0.43616957664489747
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,48,8,128,1,fp8,fp8,0,0.43659200668334963
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,48,48,128,1,float16,float16,0,0.6751887798309326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,48,1,128,1,float16,float16,0,0.15714720487594605
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,48,48,128,1,float16,fp8,0,0.6907311916351319
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,48,1,128,1,float16,fp8,0,0.16893919706344604
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,48,48,128,1,fp8,fp8,0,0.6905663967132568
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,48,1,128,1,fp8,fp8,0,0.1687551975250244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,48,2,128,1,float16,float16,0,0.16387840509414672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,48,8,128,1,float16,float16,0,0.21559200286865235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,48,2,128,1,float16,fp8,0,0.17643519639968872
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,48,2,128,1,fp8,fp8,0,0.17791999578475953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,48,4,128,1,float16,float16,0,0.18304159641265869
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,48,4,128,1,float16,fp8,0,0.19616320133209228
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,48,4,128,1,fp8,fp8,0,0.19490879774093628
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,48,8,128,1,float16,fp8,0,0.23111519813537598
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,48,8,128,1,fp8,fp8,0,0.22856318950653076
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,48,48,128,1,float16,float16,0,0.3499631881713867
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,48,48,128,1,float16,fp8,0,0.357425594329834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,48,1,128,1,float16,float16,0,0.08923199772834778
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,48,48,128,1,fp8,fp8,0,0.3588047981262207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,48,1,128,1,float16,fp8,0,0.09470400214195251
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,48,1,128,1,fp8,fp8,0,0.09443039894104004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,48,2,128,1,float16,float16,0,0.09517120122909546
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,48,2,128,1,float16,fp8,0,0.10033440589904785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,48,2,128,1,fp8,fp8,0,0.10060479640960693
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,48,4,128,1,float16,float16,0,0.1036944031715393
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,48,4,128,1,float16,fp8,0,0.10841599702835084
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,48,4,128,1,fp8,fp8,0,0.1082927942276001
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,48,8,128,1,float16,float16,0,0.12176799774169922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,48,8,128,1,float16,fp8,0,0.12631200551986693
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,48,8,128,1,fp8,fp8,0,0.12677119970321654
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,48,48,128,1,float16,float16,0,0.18819839954376222
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,48,48,128,1,float16,fp8,0,0.18843679428100585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,48,48,128,1,fp8,fp8,0,0.18781759738922119
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,48,1,128,1,float16,float16,0,0.0564079999923706
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,48,1,128,1,float16,fp8,0,0.053390401601791385
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,48,1,128,1,fp8,fp8,0,0.053595197200775144
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,48,2,128,1,float16,float16,0,0.05723680257797241
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,48,2,128,1,float16,fp8,0,0.05435680150985718
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,48,8,128,1,float16,fp8,0,0.07084959745407104
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,48,2,128,1,fp8,fp8,0,0.05523359775543213
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,48,4,128,1,float16,float16,0,0.0632752001285553
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,48,4,128,1,float16,fp8,0,0.06114400029182434
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,48,4,128,1,fp8,fp8,0,0.06115520000457764
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,48,8,128,1,float16,float16,0,0.07147359848022461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,48,8,128,1,fp8,fp8,0,0.07106400132179261
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,48,48,128,1,float16,float16,0,0.10431360006332398
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,48,48,128,1,float16,fp8,0,0.09815840125083923
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,48,48,128,1,fp8,fp8,0,0.09883360266685486
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,48,1,128,1,float16,float16,0,0.032201600074768064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,48,1,128,1,float16,fp8,0,0.033851200342178346
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,48,1,128,1,fp8,fp8,0,0.03382239937782287
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,48,2,128,1,float16,float16,0,0.03265759944915771
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,48,2,128,1,float16,fp8,0,0.034143999218940735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,48,2,128,1,fp8,fp8,0,0.034334400296211244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,48,4,128,1,float16,float16,0,0.03333759903907776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,48,4,128,1,float16,fp8,0,0.035078400373458864
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,48,4,128,1,fp8,fp8,0,0.03523040115833283
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,48,8,128,1,float16,float16,0,0.0390720009803772
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,48,8,128,1,float16,fp8,0,0.03959360122680664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,48,8,128,1,fp8,fp8,0,0.03989279866218567
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,48,48,128,1,float16,float16,0,0.05450080037117004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,48,48,128,1,float16,fp8,0,0.05321120023727417
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,48,48,128,1,fp8,fp8,0,0.0530784010887146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,48,1,128,1,float16,float16,0,0.027238398790359497
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,48,1,128,1,float16,fp8,0,0.028492799401283263
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,48,1,128,1,fp8,fp8,0,0.028563201427459717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,48,2,128,1,float16,float16,0,0.027158400416374205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,48,2,128,1,float16,fp8,0,0.028667199611663818
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,48,2,128,1,fp8,fp8,0,0.028523200750350954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,48,4,128,1,float16,float16,0,0.027539199590682982
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,48,4,128,1,float16,fp8,0,0.028697600960731505
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,48,4,128,1,fp8,fp8,0,0.028839999437332155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,48,8,128,1,float16,float16,0,0.028401601314544677
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,48,8,128,1,float16,fp8,0,0.029542401432991028
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,48,8,128,1,fp8,fp8,0,0.029441601037979125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,48,48,128,1,float16,float16,0,0.034720000624656674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,48,48,128,1,float16,fp8,0,0.035811200737953186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,48,48,128,1,fp8,fp8,0,0.0361407995223999
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,48,1,128,1,float16,float16,0,0.02152319997549057
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,48,1,128,1,float16,fp8,0,0.022596800327301027
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,48,1,128,1,fp8,fp8,0,0.02256480008363724
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,48,2,128,1,float16,float16,0,0.02143840044736862
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,48,2,128,1,float16,fp8,0,0.022023999691009523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,48,2,128,1,fp8,fp8,0,0.022657600045204163
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,48,4,128,1,float16,float16,0,0.021775999665260316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,48,4,128,1,float16,fp8,0,0.022307200729846953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,48,4,128,1,fp8,fp8,0,0.022339199483394623
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,48,8,128,1,float16,float16,0,0.021857599914073943
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,48,8,128,1,float16,fp8,0,0.022780799865722658
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,48,8,128,1,fp8,fp8,0,0.02260479927062988
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,48,48,128,1,float16,float16,0,0.023892800509929656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,48,48,128,1,float16,fp8,0,0.024771200120449068
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,48,48,128,1,fp8,fp8,0,0.02431679964065552
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,48,1,128,1,float16,float16,0,0.020529599487781526
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,48,1,128,1,float16,fp8,0,0.021356800198554994
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,48,1,128,1,fp8,fp8,0,0.02110079973936081
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,48,4,128,1,fp8,fp8,0,0.02172800004482269
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,48,2,128,1,float16,float16,0,0.020584000647068022
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,48,2,128,1,float16,fp8,0,0.021275199949741364
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,48,2,128,1,fp8,fp8,0,0.021348799765110015
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,48,4,128,1,float16,float16,0,0.020843200385570526
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,48,4,128,1,float16,fp8,0,0.021432000398635864
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,48,8,128,1,float16,float16,0,0.020895999670028687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,48,8,128,1,float16,fp8,0,0.021454399824142455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,48,8,128,1,fp8,fp8,0,0.021505600214004515
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,48,2,128,1,float16,float16,0,0.01990559995174408
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,48,48,128,1,float16,float16,0,0.021096000075340272
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,48,48,128,1,float16,fp8,0,0.021905599534511565
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,48,48,128,1,fp8,fp8,0,0.02210880070924759
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,48,1,128,1,float16,float16,0,0.019860799610614776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,48,1,128,1,float16,fp8,0,0.020612800121307374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,48,1,128,1,fp8,fp8,0,0.02084160000085831
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,48,2,128,1,float16,fp8,0,0.020777599513530733
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,48,2,128,1,fp8,fp8,0,0.02091040015220642
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,48,4,128,1,float16,float16,0,0.02034880071878433
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,48,4,128,1,float16,fp8,0,0.02081120014190674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,48,4,128,1,fp8,fp8,0,0.021033599972724915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,48,8,128,1,float16,float16,0,0.020468799769878386
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,48,8,128,1,float16,fp8,0,0.02112800031900406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,48,8,128,1,fp8,fp8,0,0.021356800198554994
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,48,1,128,1,float16,float16,0,0.15751839876174928
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,48,1,128,1,float16,fp8,0,0.17064000368118287
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,48,1,128,1,fp8,fp8,0,0.17095359563827514
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,48,2,128,1,float16,float16,0,0.16378560066223144
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,48,2,128,1,float16,fp8,0,0.178164803981781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,48,2,128,1,fp8,fp8,0,0.17766079902648926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,48,4,128,1,float16,float16,0,0.18309279680252075
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,48,4,128,1,float16,fp8,0,0.19747040271759034
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,48,4,128,1,fp8,fp8,0,0.1966320037841797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,48,8,128,1,float16,float16,0,0.24477760791778563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,48,8,128,1,float16,fp8,0,0.26250400543212893
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,48,8,128,1,fp8,fp8,0,0.2606735944747925
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,48,48,128,1,float16,float16,0,0.4413871765136719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,48,48,128,1,float16,fp8,0,0.46560959815979003
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,48,1,128,1,float16,float16,0,0.08911359906196595
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,48,48,128,1,fp8,fp8,0,0.4643807888031006
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,48,1,128,1,float16,fp8,0,0.09584320187568665
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,48,1,128,1,fp8,fp8,0,0.09507840275764465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,48,2,128,1,float16,float16,0,0.09452959895133972
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,48,2,128,1,float16,fp8,0,0.10159200429916382
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,48,2,128,1,fp8,fp8,0,0.10149439573287963
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,48,4,128,1,float16,float16,0,0.1033247947692871
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,48,4,128,1,float16,fp8,0,0.10938080549240112
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,48,4,128,1,fp8,fp8,0,0.10936319828033447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,48,8,128,1,float16,float16,0,0.135534405708313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,48,8,128,1,float16,fp8,0,0.1436895966529846
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,48,8,128,1,fp8,fp8,0,0.14310879707336427
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,48,48,128,1,float16,float16,0,0.23350880146026612
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,48,48,128,1,float16,fp8,0,0.2439903974533081
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,48,48,128,1,fp8,fp8,0,0.24323201179504395
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,48,1,128,1,float16,float16,0,0.05647839903831482
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,48,1,128,1,float16,fp8,0,0.05467680096626282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,48,1,128,1,fp8,fp8,0,0.05418239831924439
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,48,2,128,1,float16,float16,0,0.05706080198287964
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,48,2,128,1,float16,fp8,0,0.05422080159187317
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,48,2,128,1,fp8,fp8,0,0.055383998155593875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,48,4,128,1,float16,float16,0,0.06418079733848572
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,48,4,128,1,float16,fp8,0,0.0611952006816864
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,48,4,128,1,fp8,fp8,0,0.06179839968681335
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,48,8,128,1,float16,float16,0,0.07930399775505066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,48,8,128,1,float16,fp8,0,0.07941439747810364
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,48,8,128,1,fp8,fp8,0,0.08017920255661011
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,48,48,128,1,float16,float16,0,0.1266271948814392
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,48,48,128,1,float16,fp8,0,0.12810879945755005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,48,48,128,1,fp8,fp8,0,0.12817920446395875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,48,1,128,1,float16,float16,0,0.03280160129070282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,48,1,128,1,float16,fp8,0,0.034324800968170165
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,48,1,128,1,fp8,fp8,0,0.03424000144004822
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,48,2,128,1,float16,float16,0,0.03258079886436462
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,48,2,128,1,float16,fp8,0,0.03472479879856109
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,48,2,128,1,fp8,fp8,0,0.034414398670196536
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,48,4,128,1,float16,float16,0,0.033843201398849485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,48,4,128,1,float16,fp8,0,0.03552959859371185
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,48,4,128,1,fp8,fp8,0,0.03566559851169586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,48,8,128,1,float16,float16,0,0.04255520105361939
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,48,8,128,1,float16,fp8,0,0.04456000030040741
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,48,8,128,1,fp8,fp8,0,0.04441919922828674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,48,48,128,1,float16,float16,0,0.06639360189437866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,48,48,128,1,float16,fp8,0,0.06682400107383728
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,48,48,128,1,fp8,fp8,0,0.06669920086860656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,48,1,128,1,float16,float16,0,0.027036800980567932
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,48,1,128,1,float16,fp8,0,0.02861759960651398
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,48,1,128,1,fp8,fp8,0,0.028721600770950317
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,48,2,128,1,float16,float16,0,0.027169600129127502
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,48,2,128,1,float16,fp8,0,0.02820959985256195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,48,2,128,1,fp8,fp8,0,0.028601598739624024
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,48,4,128,1,float16,float16,0,0.02727999985218048
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,48,4,128,1,float16,fp8,0,0.02868799865245819
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,48,4,128,1,fp8,fp8,0,0.02879520058631897
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,48,8,128,1,float16,float16,0,0.02815999984741211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,48,8,128,1,float16,fp8,0,0.029811200499534608
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,48,8,128,1,fp8,fp8,0,0.029582399129867553
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,48,48,128,1,float16,float16,0,0.038252800703048706
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,48,48,128,1,float16,fp8,0,0.040766400098800656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,48,48,128,1,fp8,fp8,0,0.04052639901638031
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,48,1,128,1,float16,float16,0,0.021456000208854676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,48,1,128,1,float16,fp8,0,0.022393600642681123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,48,1,128,1,fp8,fp8,0,0.022299200296401978
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,48,2,128,1,float16,float16,0,0.02175839990377426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,48,2,128,1,fp8,fp8,0,0.02239840030670166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,48,4,128,1,float16,fp8,0,0.022310400009155275
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,48,4,128,1,float16,float16,0,0.021694399416446686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,48,4,128,1,fp8,fp8,0,0.022750400006771088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,48,2,128,1,float16,fp8,0,0.022422400116920472
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,48,8,128,1,float16,float16,0,0.021902400255203246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,48,8,128,1,float16,fp8,0,0.02260479927062988
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,48,8,128,1,fp8,fp8,0,0.022724799811840057
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,48,48,128,1,float16,float16,0,0.027369600534439088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,48,48,128,1,float16,fp8,0,0.02876160144805908
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,48,48,128,1,fp8,fp8,0,0.029104000329971312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,48,1,128,1,float16,float16,0,0.020510399341583253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,48,2,128,1,float16,fp8,0,0.02129279971122742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,48,1,128,1,float16,fp8,0,0.021184000372886657
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,48,1,128,1,fp8,fp8,0,0.021279999613761903
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,48,2,128,1,float16,float16,0,0.020787200331687926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,48,2,128,1,fp8,fp8,0,0.02160640060901642
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,48,4,128,1,float16,float16,0,0.02060000002384186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,48,8,128,1,float16,float16,0,0.020636799931526183
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,48,4,128,1,fp8,fp8,0,0.0216048002243042
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,48,4,128,1,float16,fp8,0,0.021648000180721282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,48,48,128,1,float16,float16,0,0.021116800606250763
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,48,8,128,1,float16,fp8,0,0.021404799818992615
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,48,48,128,1,float16,fp8,0,0.021831999719142913
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,48,8,128,1,fp8,fp8,0,0.021687999367713928
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,48,1,128,1,float16,float16,0,0.019998399913311003
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,48,2,128,1,float16,float16,0,0.020172800123691558
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,48,48,128,1,fp8,fp8,0,0.021950399875640868
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,48,1,128,1,fp8,fp8,0,0.02088959962129593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,48,1,128,1,float16,fp8,0,0.021083199977874757
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,48,2,128,1,float16,fp8,0,0.02089280039072037
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,48,2,128,1,fp8,fp8,0,0.021142399311065672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,48,4,128,1,float16,float16,0,0.020257599651813507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,48,4,128,1,float16,fp8,0,0.020812800526618956
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,48,4,128,1,fp8,fp8,0,0.02090719938278198
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,48,8,128,1,float16,float16,0,0.020420800149440765
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,48,8,128,1,float16,fp8,0,0.021542400121688843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,48,8,128,1,fp8,fp8,0,0.020904000103473663
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,48,48,128,1,float16,float16,0,0.020555199682712556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,48,48,128,1,float16,fp8,0,0.02122880071401596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,48,48,128,1,fp8,fp8,0,0.02104319930076599
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,48,1,128,1,float16,float16,0,0.01947679966688156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,48,1,128,1,float16,fp8,0,0.02035840004682541
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,48,1,128,1,fp8,fp8,0,0.020521600544452668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,48,4,128,1,float16,float16,0,0.019636799395084382
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,48,2,128,1,float16,fp8,0,0.020766399800777435
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,48,2,128,1,float16,float16,0,0.019843199849128725
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,48,8,128,1,float16,float16,0,0.019900800287723543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,48,4,128,1,fp8,fp8,0,0.02067199945449829
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,48,2,128,1,fp8,fp8,0,0.020732800662517547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,48,4,128,1,float16,fp8,0,0.020491200685501098
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,48,8,128,1,float16,fp8,0,0.021001599729061127
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,48,8,128,1,fp8,fp8,0,0.02046079933643341
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,48,1,128,1,float16,float16,0,0.09048799872398376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,48,1,128,1,float16,fp8,0,0.09605280160903931
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,48,1,128,1,fp8,fp8,0,0.09621919989585877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,48,2,128,1,float16,float16,0,0.09609119892120362
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,48,2,128,1,float16,fp8,0,0.10244319438934327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,48,4,128,1,float16,float16,0,0.11700639724731446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,48,2,128,1,fp8,fp8,0,0.10237120389938355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,48,4,128,1,float16,fp8,0,0.12698400020599365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,48,4,128,1,fp8,fp8,0,0.12669600248336793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,48,8,128,1,float16,float16,0,0.13733919858932495
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,48,8,128,1,float16,fp8,0,0.14531999826431274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,48,8,128,1,fp8,fp8,0,0.1461024045944214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,48,48,128,1,float16,float16,0,0.32559680938720703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,48,48,128,1,float16,fp8,0,0.35646240711212157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,48,1,128,1,float16,float16,0,0.057704001665115356
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,48,48,128,1,fp8,fp8,0,0.35542879104614256
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,48,1,128,1,float16,fp8,0,0.05340319871902466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,48,1,128,1,fp8,fp8,0,0.054606401920318605
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,48,2,128,1,float16,float16,0,0.05829439759254455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,48,2,128,1,float16,fp8,0,0.05581759810447693
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,48,2,128,1,fp8,fp8,0,0.056796801090240476
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,48,4,128,1,float16,float16,0,0.07141600251197815
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,48,4,128,1,float16,fp8,0,0.07157120108604431
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,48,4,128,1,fp8,fp8,0,0.07150400280952454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,48,8,128,1,float16,float16,0,0.08032960295677186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,48,8,128,1,float16,fp8,0,0.08095520138740539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,48,8,128,1,fp8,fp8,0,0.08157280087471008
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,48,48,128,1,float16,float16,0,0.17367839813232422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,48,48,128,1,float16,fp8,0,0.1859007954597473
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,48,1,128,1,float16,fp8,0,0.034560000896453856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,48,1,128,1,float16,float16,0,0.0325872004032135
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,48,1,128,1,fp8,fp8,0,0.03472320139408112
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,48,2,128,1,float16,float16,0,0.03312000036239624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,48,2,128,1,float16,fp8,0,0.03483999967575073
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,48,48,128,1,fp8,fp8,0,0.18557920455932617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,48,2,128,1,fp8,fp8,0,0.03493280112743378
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,48,4,128,1,float16,float16,0,0.0372624009847641
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,48,4,128,1,float16,fp8,0,0.0402864009141922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,48,4,128,1,fp8,fp8,0,0.04015040099620819
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,48,8,128,1,float16,float16,0,0.043428799510002135
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,48,8,128,1,float16,fp8,0,0.04467839896678925
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,48,8,128,1,fp8,fp8,0,0.04461599886417389
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,48,48,128,1,float16,float16,0,0.08834879994392394
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,48,48,128,1,float16,fp8,0,0.09461280107498168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,48,1,128,1,float16,float16,0,0.027500799298286437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,48,48,128,1,fp8,fp8,0,0.09518240094184875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,48,1,128,1,float16,fp8,0,0.028886398673057555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,48,1,128,1,fp8,fp8,0,0.028857600688934327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,48,2,128,1,float16,float16,0,0.02736159861087799
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,48,2,128,1,float16,fp8,0,0.028646400570869444
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,48,8,128,1,float16,fp8,0,0.029731199145317078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,48,2,128,1,fp8,fp8,0,0.028856000304222106
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,48,4,128,1,float16,float16,0,0.027422401309013366
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,48,4,128,1,float16,fp8,0,0.029292801022529603
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,48,4,128,1,fp8,fp8,0,0.028960001468658448
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,48,8,128,1,float16,float16,0,0.028366398811340333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,48,8,128,1,fp8,fp8,0,0.02969599962234497
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,48,48,128,1,float16,float16,0,0.04936160147190094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,48,48,128,1,float16,fp8,0,0.05435519814491272
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,48,48,128,1,fp8,fp8,0,0.05465120077133179
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,48,2,128,1,fp8,fp8,0,0.02226399928331375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,48,1,128,1,float16,float16,0,0.02143840044736862
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,48,1,128,1,float16,fp8,0,0.022510400414466857
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,48,1,128,1,fp8,fp8,0,0.022543999552726745
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,48,2,128,1,float16,float16,0,0.021542400121688843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,48,2,128,1,float16,fp8,0,0.023129600286483764
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,48,4,128,1,float16,float16,0,0.021609599888324737
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,48,4,128,1,float16,fp8,0,0.02240000069141388
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,48,4,128,1,fp8,fp8,0,0.022804799675941467
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,48,8,128,1,float16,float16,0,0.021937599778175353
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,48,8,128,1,float16,fp8,0,0.02271520048379898
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,48,8,128,1,fp8,fp8,0,0.02263839989900589
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,48,48,128,1,float16,float16,0,0.03131040036678314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,48,48,128,1,float16,fp8,0,0.03373759984970093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,48,48,128,1,fp8,fp8,0,0.033846399188041686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,48,1,128,1,float16,float16,0,0.020678399503231047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,48,1,128,1,float16,fp8,0,0.021331200003623964
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,48,1,128,1,fp8,fp8,0,0.021614399552345277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,48,2,128,1,float16,float16,0,0.020503999292850496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,48,2,128,1,float16,fp8,0,0.021462400257587434
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,48,2,128,1,fp8,fp8,0,0.02179519981145859
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,48,4,128,1,float16,float16,0,0.02052319943904877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,48,4,128,1,float16,fp8,0,0.021704000234603883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,48,4,128,1,fp8,fp8,0,0.021532799303531646
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,48,8,128,1,float16,float16,0,0.020695999264717102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,48,8,128,1,float16,fp8,0,0.021489599347114564
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,48,8,128,1,fp8,fp8,0,0.021595199406147004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,48,48,128,1,float16,float16,0,0.024905599653720856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,48,48,128,1,float16,fp8,0,0.026516801118850707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,48,48,128,1,fp8,fp8,0,0.02616479992866516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,48,1,128,1,float16,float16,0,0.0197952002286911
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,48,1,128,1,float16,fp8,0,0.020916800200939178
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,48,1,128,1,fp8,fp8,0,0.020750400424003602
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,48,2,128,1,float16,float16,0,0.01988479942083359
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,48,2,128,1,float16,fp8,0,0.021104000508785248
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,48,4,128,1,float16,fp8,0,0.02118239998817444
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,48,4,128,1,float16,float16,0,0.02008640021085739
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,48,2,128,1,fp8,fp8,0,0.020815999805927278
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,48,4,128,1,fp8,fp8,0,0.021033599972724915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,48,8,128,1,fp8,fp8,0,0.021049599349498748
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,48,8,128,1,float16,fp8,0,0.02096160054206848
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,48,8,128,1,float16,float16,0,0.020275199413299562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,48,48,128,1,float16,float16,0,0.02072799950838089
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,48,48,128,1,float16,fp8,0,0.02122559994459152
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,48,48,128,1,fp8,fp8,0,0.021411199867725373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,48,1,128,1,float16,float16,0,0.01964640021324158
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,48,1,128,1,float16,fp8,0,0.02053920030593872
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,48,1,128,1,fp8,fp8,0,0.020334400236606598
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,48,2,128,1,float16,float16,0,0.019648000597953796
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,48,2,128,1,float16,fp8,0,0.02072640061378479
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,48,2,128,1,fp8,fp8,0,0.02061759978532791
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,48,4,128,1,float16,float16,0,0.019875200092792512
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,48,4,128,1,float16,fp8,0,0.020502400398254395
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,48,4,128,1,fp8,fp8,0,0.020948800444602966
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,48,8,128,1,float16,float16,0,0.019812799990177155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,48,8,128,1,float16,fp8,0,0.020411199331283568
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,48,8,128,1,fp8,fp8,0,0.020747199654579163
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,48,48,128,1,float16,float16,0,0.02061759978532791
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,48,48,128,1,float16,fp8,0,0.02107200026512146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,48,48,128,1,fp8,fp8,0,0.020948800444602966
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,48,1,128,1,float16,float16,0,0.019575999677181245
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,48,1,128,1,float16,fp8,0,0.02035039961338043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,48,1,128,1,fp8,fp8,0,0.02040960043668747
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,48,2,128,1,float16,float16,0,0.019531199336051942
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,48,2,128,1,float16,fp8,0,0.020155200362205507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,48,2,128,1,fp8,fp8,0,0.0205935999751091
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,48,4,128,1,float16,float16,0,0.019652800261974336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,48,4,128,1,float16,fp8,0,0.020190399885177613
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,48,4,128,1,fp8,fp8,0,0.020343999564647674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,48,8,128,1,float16,float16,0,0.019441600143909454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,48,8,128,1,fp8,fp8,0,0.0202224001288414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,48,8,128,1,float16,fp8,0,0.020505599677562714
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,48,1,128,1,float16,float16,0,0.029577600955963134
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,48,1,128,1,float16,fp8,0,0.031457599997520444
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,48,1,128,1,fp8,fp8,0,0.031523200869560244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,48,2,128,1,float16,float16,0,0.03738400042057037
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,48,2,128,1,float16,fp8,0,0.0402319997549057
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,48,2,128,1,fp8,fp8,0,0.04046559929847717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,48,4,128,1,float16,float16,0,0.05204799771308899
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,48,4,128,1,float16,fp8,0,0.0582319974899292
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,48,4,128,1,fp8,fp8,0,0.05826879739761352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,48,8,128,1,float16,float16,0,0.08031839728355408
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,48,8,128,1,float16,fp8,0,0.09397760033607483
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,48,8,128,1,fp8,fp8,0,0.09382399916648865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,48,48,128,1,float16,float16,0,0.19298399686813356
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,48,48,128,1,float16,fp8,0,0.23750240802764894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,48,1,128,1,float16,float16,0,0.022758400440216063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,48,48,128,1,fp8,fp8,0,0.23659999370574952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,48,1,128,1,float16,fp8,0,0.023532800376415253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,48,1,128,1,fp8,fp8,0,0.02372799962759018
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,48,2,128,1,float16,float16,0,0.026660799980163574
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,48,2,128,1,float16,fp8,0,0.02821600139141083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,48,2,128,1,fp8,fp8,0,0.028195199370384217
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,48,4,128,1,float16,float16,0,0.0343392014503479
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,48,4,128,1,float16,fp8,0,0.03751200139522552
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,48,4,128,1,fp8,fp8,0,0.037339198589324954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,48,8,128,1,float16,float16,0,0.048681598901748654
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,48,8,128,1,float16,fp8,0,0.05541279911994934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,48,8,128,1,fp8,fp8,0,0.05535680055618286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,48,48,128,1,float16,float16,0,0.10717120170593261
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,48,48,128,1,float16,fp8,0,0.12688640356063843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,48,48,128,1,fp8,fp8,0,0.12826080322265626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,48,1,128,1,float16,float16,0,0.021031999588012697
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,48,1,128,1,float16,fp8,0,0.021652799844741822
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,48,1,128,1,fp8,fp8,0,0.021507200598716737
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,48,2,128,1,float16,float16,0,0.021425600349903106
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,48,2,128,1,float16,fp8,0,0.022439999878406523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,48,2,128,1,fp8,fp8,0,0.02197919934988022
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,48,4,128,1,float16,float16,0,0.025169599056243896
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,48,4,128,1,float16,fp8,0,0.026955199241638184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,48,4,128,1,fp8,fp8,0,0.026790401339530943
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,48,8,128,1,float16,float16,0,0.032451200485229495
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,48,8,128,1,float16,fp8,0,0.03607679903507233
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,48,8,128,1,fp8,fp8,0,0.03597440123558045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,48,48,128,1,float16,float16,0,0.06266080141067505
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,48,48,128,1,float16,fp8,0,0.071806401014328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,48,48,128,1,fp8,fp8,0,0.07256320118904114
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,48,1,128,1,float16,fp8,0,0.020948800444602966
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,48,2,128,1,float16,float16,0,0.020638400316238405
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,48,1,128,1,float16,float16,0,0.02003680020570755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,48,1,128,1,fp8,fp8,0,0.021217599511146545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,48,2,128,1,float16,fp8,0,0.021155199408531188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,48,4,128,1,float16,float16,0,0.021011200547218323
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,48,2,128,1,fp8,fp8,0,0.02147040069103241
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,48,4,128,1,float16,fp8,0,0.02147520035505295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,48,4,128,1,fp8,fp8,0,0.02176479995250702
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,48,8,128,1,float16,float16,0,0.024743999540805816
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,48,8,128,1,float16,fp8,0,0.026254400610923767
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,48,8,128,1,fp8,fp8,0,0.026001599431037904
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,48,48,128,1,float16,float16,0,0.039611199498176576
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,48,48,128,1,float16,fp8,0,0.04398559927940369
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,48,48,128,1,fp8,fp8,0,0.043992000818252566
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,48,1,128,1,float16,float16,0,0.01942719966173172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,48,1,128,1,float16,fp8,0,0.020052799582481386
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,48,1,128,1,fp8,fp8,0,0.020252799987792967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,48,2,128,1,float16,float16,0,0.01927199959754944
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,48,2,128,1,float16,fp8,0,0.02057439982891083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,48,2,128,1,fp8,fp8,0,0.020528000593185425
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,48,4,128,1,float16,float16,0,0.019662399590015412
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,48,4,128,1,float16,fp8,0,0.020971199870109557
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,48,4,128,1,fp8,fp8,0,0.020839999616146087
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,48,8,128,1,float16,float16,0,0.020057600736618043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,48,8,128,1,float16,fp8,0,0.020947200059890748
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,48,8,128,1,fp8,fp8,0,0.021065600216388702
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,48,48,128,1,float16,float16,0,0.02799200117588043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,48,48,128,1,float16,fp8,0,0.030063998699188233
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,48,48,128,1,fp8,fp8,0,0.0305184006690979
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,48,1,128,1,float16,float16,0,0.019467200338840484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,48,1,128,1,float16,fp8,0,0.020161600410938264
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,48,1,128,1,fp8,fp8,0,0.01992799937725067
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,48,2,128,1,float16,float16,0,0.019380800426006317
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,48,2,128,1,float16,fp8,0,0.02008640021085739
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,48,2,128,1,fp8,fp8,0,0.020019200444221497
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,48,4,128,1,float16,float16,0,0.019592000544071196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,48,4,128,1,float16,fp8,0,0.020732800662517547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,48,4,128,1,fp8,fp8,0,0.020351999998092653
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,48,8,128,1,float16,float16,0,0.019942399859428406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,48,8,128,1,float16,fp8,0,0.020604799687862396
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,48,8,128,1,fp8,fp8,0,0.020721599459648132
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,48,48,128,1,float16,float16,0,0.023580799996852874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,48,1,128,1,float16,float16,0,0.019023999571800232
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,48,1,128,1,float16,fp8,0,0.01969760060310364
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,48,48,128,1,float16,fp8,0,0.024956800043582916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,48,1,128,1,fp8,fp8,0,0.01960480064153671
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,48,48,128,1,fp8,fp8,0,0.025409600138664244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,48,2,128,1,float16,float16,0,0.01915999948978424
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,48,2,128,1,float16,fp8,0,0.01993599981069565
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,48,2,128,1,fp8,fp8,0,0.01966720074415207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,48,4,128,1,float16,float16,0,0.019393600523471832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,48,4,128,1,float16,fp8,0,0.019969600439071655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,48,4,128,1,fp8,fp8,0,0.01995840072631836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,48,8,128,1,float16,float16,0,0.019395199418067933
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,48,8,128,1,float16,fp8,0,0.020446400344371795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,48,8,128,1,fp8,fp8,0,0.020294399559497835
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,48,48,128,1,float16,fp8,0,0.020587199926376344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,48,48,128,1,float16,float16,0,0.01977919936180115
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,48,48,128,1,fp8,fp8,0,0.02083200067281723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,48,1,128,1,float16,float16,0,0.017287999391555786
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,48,1,128,1,float16,fp8,0,0.01804320067167282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,48,1,128,1,fp8,fp8,0,0.018094399571418764
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,48,2,128,1,float16,float16,0,0.01894879937171936
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,48,2,128,1,float16,fp8,0,0.019788800179958342
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,48,2,128,1,fp8,fp8,0,0.01969760060310364
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,48,4,128,1,float16,fp8,0,0.02008800059556961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,48,4,128,1,float16,float16,0,0.019236800074577332
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,48,4,128,1,fp8,fp8,0,0.019939200580120088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,48,8,128,1,float16,float16,0,0.019099199771881105
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,48,8,128,1,float16,fp8,0,0.020137600600719452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,48,8,128,1,fp8,fp8,0,0.020057600736618043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,48,48,128,1,float16,float16,0,0.01947840005159378
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,48,48,128,1,float16,fp8,0,0.020265600085258482
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,48,48,128,1,fp8,fp8,0,0.020411199331283568
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,48,1,128,1,float16,float16,0,0.01653439998626709
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,48,1,128,1,float16,fp8,0,0.017215999960899352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,48,1,128,1,fp8,fp8,0,0.017110399901866913
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,48,2,128,1,float16,float16,0,0.01687040030956268
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,48,2,128,1,fp8,fp8,0,0.017734399437904357
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,48,2,128,1,float16,fp8,0,0.017795200645923614
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,48,4,128,1,float16,fp8,0,0.019780799746513367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,48,4,128,1,float16,float16,0,0.018724800646305086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,48,4,128,1,fp8,fp8,0,0.019811199605464937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,48,8,128,1,float16,float16,0,0.01881919950246811
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,48,8,128,1,float16,fp8,0,0.01960480064153671
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,48,8,128,1,fp8,fp8,0,0.019739200174808503
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,40,1,128,1,float16,fp8,0,31.711306762695312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,40,1,128,1,fp8,fp8,0,32.050421142578124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,40,2,128,1,float16,fp8,0,32.24409484863281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,40,2,128,1,fp8,fp8,0,31.978668212890625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,40,1,128,1,float16,float16,0,38.8212158203125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,40,2,128,1,float16,float16,0,39.32286682128906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,40,4,128,1,float16,float16,0,39.41449890136719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,40,4,128,1,float16,fp8,0,32.549050903320314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,40,40,128,1,float16,fp8,0,18.817158508300782
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,40,40,128,1,fp8,fp8,0,18.534365844726562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,40,40,128,1,float16,float16,0,21.767428588867187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,40,1,128,1,float16,float16,0,19.903578186035155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,40,4,128,1,fp8,fp8,0,32.530975341796875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,40,8,128,1,fp8,fp8,0,33.150537109375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,40,8,128,1,float16,fp8,0,33.34461364746094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,40,8,128,1,float16,float16,0,39.81842041015625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,40,1,128,1,fp8,fp8,0,16.011384582519533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,40,1,128,1,float16,fp8,0,16.52042236328125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,40,2,128,1,float16,fp8,0,16.241482543945313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,40,2,128,1,float16,float16,0,20.018588256835937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,40,2,128,1,fp8,fp8,0,16.56269226074219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,40,4,128,1,float16,fp8,0,16.85828857421875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,40,4,128,1,float16,float16,0,20.002474975585937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,40,4,128,1,fp8,fp8,0,16.38123779296875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,40,40,128,1,float16,fp8,0,9.262725067138671
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,40,40,128,1,float16,float16,0,10.919439697265625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,40,8,128,1,float16,fp8,0,16.813670349121093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,40,40,128,1,fp8,fp8,0,9.317262268066406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,40,8,128,1,fp8,fp8,0,16.692240905761718
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,40,8,128,1,float16,float16,0,19.955177307128906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,40,1,128,1,float16,float16,0,9.331948852539062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,40,1,128,1,float16,fp8,0,7.985008239746094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,40,1,128,1,fp8,fp8,0,7.938219451904297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,40,2,128,1,float16,float16,0,9.651830291748047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,40,2,128,1,float16,fp8,0,8.115980529785157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,40,2,128,1,fp8,fp8,0,8.058785247802735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,40,4,128,1,float16,fp8,0,8.083822631835938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,40,4,128,1,float16,float16,0,10.080159759521484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,40,4,128,1,fp8,fp8,0,8.12193603515625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,40,8,128,1,float16,float16,0,10.046259307861328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,40,40,128,1,float16,float16,0,5.448025512695312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,40,8,128,1,float16,fp8,0,8.18145294189453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,40,40,128,1,float16,fp8,0,4.662435150146484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,40,40,128,1,fp8,fp8,0,4.659934234619141
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,40,8,128,1,fp8,fp8,0,8.335855865478516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,40,1,128,1,float16,fp8,0,3.9497665405273437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,40,1,128,1,float16,float16,0,4.7257038116455075
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,40,2,128,1,float16,fp8,0,3.9687599182128905
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,40,1,128,1,fp8,fp8,0,4.322110366821289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,40,2,128,1,fp8,fp8,0,4.012212753295898
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,40,2,128,1,float16,float16,0,5.088020706176758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,40,4,128,1,float16,fp8,0,4.112031936645508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,40,4,128,1,fp8,fp8,0,4.030196762084961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,40,4,128,1,float16,float16,0,4.765564727783203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,40,8,128,1,float16,float16,0,4.936147308349609
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,40,8,128,1,float16,fp8,0,4.111046218872071
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,40,8,128,1,fp8,fp8,0,4.1129600524902346
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,40,1,128,1,float16,fp8,0,18.560816955566406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,40,1,128,1,fp8,fp8,0,18.229826354980467
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,40,2,128,1,float16,fp8,0,18.446867370605467
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,40,2,128,1,fp8,fp8,0,18.470986938476564
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,40,1,128,1,float16,float16,0,22.177442932128905
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,40,4,128,1,float16,fp8,0,18.576052856445312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,40,2,128,1,float16,float16,0,22.096786499023438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,40,4,128,1,float16,float16,0,22.157606506347655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,40,40,128,1,float16,fp8,0,11.113934326171876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,40,40,128,1,float16,float16,0,13.454666137695312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,40,40,128,1,fp8,fp8,0,11.063361358642577
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,40,1,128,1,float16,float16,0,11.047891235351562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,40,4,128,1,fp8,fp8,0,19.208050537109376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,40,8,128,1,float16,fp8,0,19.333798217773438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,40,8,128,1,fp8,fp8,0,19.601042175292967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,40,8,128,1,float16,float16,0,23.104835510253906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,40,1,128,1,float16,fp8,0,9.411151885986328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,40,1,128,1,fp8,fp8,0,9.175041961669923
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,40,2,128,1,float16,float16,0,10.882924652099609
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,40,2,128,1,float16,fp8,0,9.149046325683594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,40,2,128,1,fp8,fp8,0,9.334166717529296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,40,4,128,1,float16,fp8,0,9.314995574951173
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,40,4,128,1,float16,float16,0,11.201280212402343
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,40,4,128,1,fp8,fp8,0,9.229408264160156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,40,40,128,1,float16,float16,0,6.282969665527344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,40,40,128,1,float16,fp8,0,5.649537658691406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,40,8,128,1,float16,fp8,0,9.554641723632812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,40,8,128,1,float16,float16,0,11.478984069824218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,40,8,128,1,fp8,fp8,0,9.455944061279297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,40,40,128,1,fp8,fp8,0,5.557257461547851
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,40,1,128,1,float16,float16,0,5.486865615844726
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,40,1,128,1,float16,fp8,0,4.511294555664063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,40,1,128,1,fp8,fp8,0,4.542355346679687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,40,2,128,1,float16,float16,0,5.568759918212891
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,40,2,128,1,float16,fp8,0,4.6345569610595705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,40,2,128,1,fp8,fp8,0,4.667945480346679
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,40,4,128,1,float16,fp8,0,4.634175872802734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,40,4,128,1,fp8,fp8,0,4.677870559692383
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,40,4,128,1,float16,float16,0,5.827249526977539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,40,8,128,1,float16,float16,0,5.487648010253906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,40,40,128,1,float16,float16,0,3.17861270904541
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,40,8,128,1,float16,fp8,0,4.705611038208008
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,40,40,128,1,float16,fp8,0,2.798846435546875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,40,40,128,1,fp8,fp8,0,2.7992319107055663
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,40,8,128,1,fp8,fp8,0,4.89399528503418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,40,1,128,1,float16,float16,0,2.566414451599121
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,40,1,128,1,float16,fp8,0,2.360355186462402
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,40,1,128,1,fp8,fp8,0,2.459132766723633
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,40,2,128,1,float16,float16,0,2.387710380554199
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,40,2,128,1,float16,fp8,0,2.2761615753173827
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,40,2,128,1,fp8,fp8,0,2.2802255630493162
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,40,4,128,1,float16,float16,0,2.6489744186401367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,40,4,128,1,float16,fp8,0,2.5334751129150392
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,40,4,128,1,fp8,fp8,0,2.3054880142211913
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,40,8,128,1,float16,float16,0,2.679614448547363
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,40,8,128,1,float16,fp8,0,2.45914249420166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,40,8,128,1,fp8,fp8,0,2.3562559127807616
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,40,1,128,1,fp8,fp8,0,12.76360626220703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,40,1,128,1,float16,fp8,0,13.08072509765625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,40,2,128,1,float16,fp8,0,13.012939453125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,40,2,128,1,fp8,fp8,0,12.95018310546875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,40,1,128,1,float16,float16,0,15.212721252441407
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,40,2,128,1,float16,float16,0,15.314083862304688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,40,4,128,1,float16,fp8,0,13.149249267578124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,40,4,128,1,float16,float16,0,15.667707824707032
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,40,40,128,1,float16,float16,0,9.453171539306641
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,40,40,128,1,float16,fp8,0,8.107202911376953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,40,1,128,1,float16,float16,0,7.76553955078125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,40,40,128,1,fp8,fp8,0,8.039315032958985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,40,4,128,1,fp8,fp8,0,13.149667358398437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,40,8,128,1,float16,fp8,0,13.420744323730469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,40,8,128,1,fp8,fp8,0,13.391351318359375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,40,8,128,1,float16,float16,0,16.50174560546875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,40,1,128,1,fp8,fp8,0,6.357080078125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,40,1,128,1,float16,fp8,0,6.505292510986328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,40,2,128,1,float16,fp8,0,6.453936004638672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,40,2,128,1,float16,float16,0,7.895302581787109
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,40,2,128,1,fp8,fp8,0,6.445932769775391
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,40,4,128,1,float16,fp8,0,6.488311767578125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,40,4,128,1,float16,float16,0,7.975555419921875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,40,4,128,1,fp8,fp8,0,6.691793823242188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,40,40,128,1,float16,fp8,0,4.09869270324707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,40,40,128,1,float16,float16,0,4.678699111938476
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,40,8,128,1,float16,fp8,0,6.759130859375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,40,8,128,1,fp8,fp8,0,6.833866882324219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,40,8,128,1,float16,float16,0,8.041255950927734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,40,40,128,1,fp8,fp8,0,4.095119857788086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,40,1,128,1,float16,float16,0,3.7958911895751952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,40,1,128,1,float16,fp8,0,3.1982336044311523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,40,1,128,1,fp8,fp8,0,3.1713647842407227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,40,2,128,1,float16,float16,0,3.610316848754883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,40,2,128,1,float16,fp8,0,3.2150768280029296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,40,2,128,1,fp8,fp8,0,3.3527057647705076
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,40,4,128,1,float16,fp8,0,3.2458511352539063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,40,4,128,1,float16,float16,0,3.764334487915039
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,40,4,128,1,fp8,fp8,0,3.4237087249755858
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,40,8,128,1,float16,float16,0,3.880411148071289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,40,40,128,1,float16,float16,0,2.2288320541381834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,40,8,128,1,float16,fp8,0,3.3266624450683593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,40,40,128,1,float16,fp8,0,2.0709808349609373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,40,1,128,1,float16,float16,0,1.7916608810424806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,40,40,128,1,fp8,fp8,0,2.067452812194824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,40,1,128,1,float16,fp8,0,1.5946831703186035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,40,8,128,1,fp8,fp8,0,3.4480510711669923
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,40,1,128,1,fp8,fp8,0,1.7034015655517578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,40,2,128,1,float16,float16,0,1.7744192123413085
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,40,2,128,1,float16,fp8,0,1.6101600646972656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,40,2,128,1,fp8,fp8,0,1.6030624389648438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,40,4,128,1,float16,float16,0,1.8121919631958008
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,40,4,128,1,float16,fp8,0,1.6327247619628906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,40,4,128,1,fp8,fp8,0,1.6361648559570312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,40,8,128,1,float16,float16,0,1.840239906311035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,40,8,128,1,float16,fp8,0,1.7032655715942382
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,40,8,128,1,fp8,fp8,0,1.7511503219604492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,40,1,128,1,float16,fp8,0,16.767132568359376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,40,1,128,1,fp8,fp8,0,16.806559753417968
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,40,2,128,1,float16,fp8,0,16.682293701171876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,40,2,128,1,fp8,fp8,0,16.82190399169922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,40,4,128,1,float16,fp8,0,17.21400604248047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,40,1,128,1,float16,float16,0,19.755778503417968
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,40,2,128,1,float16,float16,0,19.763999938964844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,40,4,128,1,float16,float16,0,20.723953247070312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,40,40,128,1,float16,fp8,0,11.004827117919922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,40,40,128,1,fp8,fp8,0,11.012467193603516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,40,40,128,1,float16,float16,0,12.765806579589844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,40,1,128,1,float16,float16,0,10.202728271484375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,40,4,128,1,fp8,fp8,0,17.59633026123047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,40,8,128,1,float16,fp8,0,17.97747039794922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,40,8,128,1,fp8,fp8,0,18.061856079101563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,40,8,128,1,float16,float16,0,21.314865112304688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,40,1,128,1,float16,fp8,0,8.321148681640626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,40,1,128,1,fp8,fp8,0,8.263777923583984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,40,2,128,1,float16,fp8,0,8.536670684814453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,40,2,128,1,float16,float16,0,10.465796661376952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,40,2,128,1,fp8,fp8,0,8.5406494140625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,40,4,128,1,float16,fp8,0,8.606974029541016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,40,4,128,1,float16,float16,0,10.470887756347656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,40,4,128,1,fp8,fp8,0,8.534385681152344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,40,40,128,1,float16,float16,0,6.223559951782226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,40,40,128,1,float16,fp8,0,5.493105697631836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,40,8,128,1,float16,fp8,0,8.79880142211914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,40,8,128,1,float16,float16,0,10.48017578125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,40,8,128,1,fp8,fp8,0,8.948989105224609
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,40,40,128,1,fp8,fp8,0,5.522536087036133
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,40,1,128,1,float16,float16,0,4.814097595214844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,40,1,128,1,float16,fp8,0,4.134681701660156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,40,1,128,1,fp8,fp8,0,4.243852615356445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,40,2,128,1,float16,float16,0,4.876518249511719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,40,2,128,1,fp8,fp8,0,4.246057510375977
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,40,2,128,1,float16,fp8,0,4.397256088256836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,40,4,128,1,float16,fp8,0,4.327585601806641
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,40,4,128,1,float16,float16,0,5.058407974243164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,40,4,128,1,fp8,fp8,0,4.253902435302734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,40,40,128,1,float16,float16,0,3.03995361328125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,40,1,128,1,float16,float16,0,2.3020656585693358
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,40,40,128,1,float16,fp8,0,2.776350402832031
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,40,8,128,1,float16,float16,0,5.101684951782227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,40,8,128,1,float16,fp8,0,4.453847885131836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,40,40,128,1,fp8,fp8,0,2.9017839431762695
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,40,8,128,1,fp8,fp8,0,4.448297500610352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,40,1,128,1,float16,fp8,0,2.0733312606811523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,40,1,128,1,fp8,fp8,0,2.142076873779297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,40,2,128,1,float16,float16,0,2.371780776977539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,40,2,128,1,float16,fp8,0,2.103158378601074
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,40,2,128,1,fp8,fp8,0,2.172455978393555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,40,4,128,1,float16,fp8,0,2.1287439346313475
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,40,4,128,1,float16,float16,0,2.3961135864257814
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,40,4,128,1,fp8,fp8,0,2.13690242767334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,40,8,128,1,float16,float16,0,2.31909122467041
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,40,8,128,1,float16,fp8,0,2.1989696502685545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,40,40,128,1,float16,float16,0,1.4827664375305176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,40,40,128,1,float16,fp8,0,1.4107855796813964
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,40,40,128,1,fp8,fp8,0,1.4010208129882813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,40,1,128,1,float16,float16,0,1.1731023788452148
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,40,1,128,1,float16,fp8,0,1.060537624359131
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,40,8,128,1,fp8,fp8,0,2.304617691040039
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,40,1,128,1,fp8,fp8,0,1.1142224311828612
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,40,2,128,1,float16,float16,0,1.1281439781188964
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,40,2,128,1,float16,fp8,0,1.1037311553955078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,40,2,128,1,fp8,fp8,0,1.0596816062927246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,40,4,128,1,float16,float16,0,1.1491663932800293
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,40,4,128,1,float16,fp8,0,1.1071536064147949
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,40,4,128,1,fp8,fp8,0,1.0792880058288574
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,40,8,128,1,float16,float16,0,1.1857168197631835
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,40,8,128,1,float16,fp8,0,1.123136043548584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,40,8,128,1,fp8,fp8,0,1.1595248222351073
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,40,1,128,1,float16,fp8,0,9.644905853271485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,40,1,128,1,fp8,fp8,0,9.597876739501952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,40,2,128,1,float16,fp8,0,9.89305419921875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,40,2,128,1,fp8,fp8,0,9.688164520263673
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,40,1,128,1,float16,float16,0,11.193819427490235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,40,4,128,1,float16,fp8,0,9.933236694335937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,40,2,128,1,float16,float16,0,11.502611541748047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,40,4,128,1,float16,float16,0,11.669261169433593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,40,1,128,1,float16,float16,0,5.578351974487305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,40,40,128,1,float16,fp8,0,6.873331451416016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,40,40,128,1,float16,float16,0,7.553521728515625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,40,40,128,1,fp8,fp8,0,6.909651184082032
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,40,4,128,1,fp8,fp8,0,9.954905700683593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,40,8,128,1,float16,fp8,0,10.4633056640625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,40,8,128,1,fp8,fp8,0,10.353118133544921
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,40,8,128,1,float16,float16,0,12.22638397216797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,40,1,128,1,fp8,fp8,0,4.768635177612305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,40,1,128,1,float16,fp8,0,4.893353652954102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,40,2,128,1,float16,fp8,0,4.87597770690918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,40,2,128,1,float16,float16,0,5.629289627075195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,40,2,128,1,fp8,fp8,0,4.878656005859375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,40,4,128,1,float16,fp8,0,4.971299362182617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,40,4,128,1,float16,float16,0,5.878750228881836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,40,4,128,1,fp8,fp8,0,5.00296630859375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,40,40,128,1,float16,fp8,0,3.456897735595703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,40,40,128,1,float16,float16,0,3.8538654327392576
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,40,8,128,1,float16,fp8,0,5.162486267089844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,40,8,128,1,float16,float16,0,5.977190399169922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,40,8,128,1,fp8,fp8,0,5.165577697753906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,40,1,128,1,float16,fp8,0,2.400412750244141
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,40,1,128,1,float16,float16,0,2.7353328704833983
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,40,40,128,1,fp8,fp8,0,3.454244613647461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,40,1,128,1,fp8,fp8,0,2.398072052001953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,40,2,128,1,float16,fp8,0,2.4347904205322264
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,40,2,128,1,float16,float16,0,2.694071960449219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,40,2,128,1,fp8,fp8,0,2.534763145446777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,40,4,128,1,float16,fp8,0,2.493550491333008
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,40,4,128,1,float16,float16,0,2.804280090332031
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,40,4,128,1,fp8,fp8,0,2.615510368347168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,40,8,128,1,float16,float16,0,2.836777687072754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,40,8,128,1,float16,fp8,0,2.5954992294311525
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,40,40,128,1,float16,float16,0,1.994910430908203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,40,40,128,1,float16,fp8,0,1.745257568359375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,40,1,128,1,float16,float16,0,1.3561280250549317
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,40,1,128,1,float16,fp8,0,1.271027183532715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,40,8,128,1,fp8,fp8,0,2.5994144439697267
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,40,40,128,1,fp8,fp8,0,1.747323226928711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,40,1,128,1,fp8,fp8,0,1.2962592124938965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,40,2,128,1,float16,float16,0,1.3043199539184571
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,40,2,128,1,float16,fp8,0,1.3100671768188477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,40,2,128,1,fp8,fp8,0,1.2308863639831542
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,40,4,128,1,float16,float16,0,1.343169593811035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,40,4,128,1,float16,fp8,0,1.267411231994629
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,40,4,128,1,fp8,fp8,0,1.2591183662414551
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,40,8,128,1,float16,float16,0,1.3915295600891113
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,40,40,128,1,float16,float16,0,0.9664976119995117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,40,8,128,1,float16,fp8,0,1.3093791961669923
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,40,40,128,1,float16,fp8,0,0.8863344192504883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,40,1,128,1,float16,fp8,0,0.6184447765350342
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,40,8,128,1,fp8,fp8,0,1.3466959953308106
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,40,1,128,1,fp8,fp8,0,0.6286831855773926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,40,1,128,1,float16,float16,0,0.6536032199859619
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,40,40,128,1,fp8,fp8,0,0.9181568145751953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,40,2,128,1,float16,float16,0,0.6624752044677734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,40,2,128,1,float16,fp8,0,0.6274303913116455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,40,2,128,1,fp8,fp8,0,0.6309936046600342
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,40,4,128,1,float16,float16,0,0.6702191829681396
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,40,4,128,1,float16,fp8,0,0.649183988571167
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,40,4,128,1,fp8,fp8,0,0.6425727844238281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,40,8,128,1,float16,float16,0,0.7067920207977295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,40,8,128,1,float16,fp8,0,0.6681519985198975
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,40,8,128,1,fp8,fp8,0,0.6671599864959716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,40,1,128,1,float16,fp8,0,9.029154968261718
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,40,1,128,1,fp8,fp8,0,8.980022430419922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,40,2,128,1,float16,fp8,0,9.225790405273438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,40,1,128,1,float16,float16,0,10.48010711669922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,40,2,128,1,fp8,fp8,0,9.119348907470703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,40,2,128,1,float16,float16,0,10.846465301513671
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,40,4,128,1,float16,float16,0,10.839969635009766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,40,4,128,1,float16,fp8,0,9.451484680175781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,40,40,128,1,float16,float16,0,7.7801567077636715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,40,4,128,1,fp8,fp8,0,9.481654357910156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,40,40,128,1,float16,fp8,0,7.3488525390625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,40,40,128,1,fp8,fp8,0,7.318753814697265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,40,8,128,1,float16,fp8,0,9.995526123046876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,40,8,128,1,fp8,fp8,0,10.00350570678711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,40,8,128,1,float16,float16,0,11.501439666748047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,40,1,128,1,float16,float16,0,5.177094268798828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,40,1,128,1,float16,fp8,0,4.503891372680664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,40,1,128,1,fp8,fp8,0,4.518150329589844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,40,2,128,1,float16,float16,0,5.376844787597657
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,40,2,128,1,float16,fp8,0,4.599332809448242
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,40,2,128,1,fp8,fp8,0,4.597910308837891
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,40,4,128,1,float16,fp8,0,4.753208160400391
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,40,4,128,1,float16,float16,0,5.385377502441406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,40,4,128,1,fp8,fp8,0,4.851273727416992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,40,40,128,1,float16,float16,0,3.854492950439453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,40,8,128,1,float16,fp8,0,5.006830215454102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,40,8,128,1,float16,float16,0,5.578222274780273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,40,1,128,1,float16,float16,0,2.51844482421875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,40,40,128,1,float16,fp8,0,3.648166275024414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,40,8,128,1,fp8,fp8,0,5.080875015258789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,40,1,128,1,float16,fp8,0,2.2640031814575194
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,40,40,128,1,fp8,fp8,0,3.663908767700195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,40,1,128,1,fp8,fp8,0,2.2850624084472657
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,40,2,128,1,float16,float16,0,2.5394384384155275
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,40,2,128,1,float16,fp8,0,2.3781360626220702
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,40,2,128,1,fp8,fp8,0,2.4197839736938476
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,40,4,128,1,float16,float16,0,2.6380336761474608
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,40,4,128,1,float16,fp8,0,2.378241539001465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,40,4,128,1,fp8,fp8,0,2.3825536727905274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,40,8,128,1,float16,float16,0,2.7462207794189455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,40,8,128,1,float16,fp8,0,2.5196352005004883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,40,1,128,1,float16,float16,0,1.2014431953430176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,40,40,128,1,float16,float16,0,1.922488021850586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,40,8,128,1,fp8,fp8,0,2.5734560012817385
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,40,40,128,1,float16,fp8,0,1.848414421081543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,40,1,128,1,float16,fp8,0,1.147753620147705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,40,40,128,1,fp8,fp8,0,1.971887969970703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,40,1,128,1,fp8,fp8,0,1.210041618347168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,40,2,128,1,float16,float16,0,1.214961624145508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,40,2,128,1,float16,fp8,0,1.2160320281982422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,40,4,128,1,float16,float16,0,1.2598112106323243
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,40,2,128,1,fp8,fp8,0,1.1663167953491211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,40,4,128,1,float16,fp8,0,1.2038800239562988
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,40,4,128,1,fp8,fp8,0,1.2041263580322266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,40,8,128,1,float16,float16,0,1.3412303924560547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,40,8,128,1,float16,fp8,0,1.2833056449890137
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,40,40,128,1,float16,float16,0,0.9839183807373046
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,40,8,128,1,fp8,fp8,0,1.3046496391296387
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,40,40,128,1,float16,fp8,0,0.9764063835144043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,40,1,128,1,float16,float16,0,0.6062047958374024
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,40,40,128,1,fp8,fp8,0,0.9356975555419922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,40,1,128,1,float16,fp8,0,0.5873583793640137
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,40,1,128,1,fp8,fp8,0,0.6351151943206788
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,40,2,128,1,float16,float16,0,0.6255712032318115
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,40,2,128,1,float16,fp8,0,0.6006015777587891
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,40,2,128,1,fp8,fp8,0,0.5972015857696533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,40,4,128,1,float16,float16,0,0.6452559947967529
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,40,4,128,1,float16,fp8,0,0.6167903900146484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,40,4,128,1,fp8,fp8,0,0.6168831825256348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,40,8,128,1,float16,float16,0,0.6784480094909668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,40,8,128,1,float16,fp8,0,0.6630112171173096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,40,8,128,1,fp8,fp8,0,0.6490032196044921
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,40,40,128,1,float16,float16,0,0.49997601509094236
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,40,40,128,1,float16,fp8,0,0.4828815937042236
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,40,40,128,1,fp8,fp8,0,0.48056640625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,40,1,128,1,float16,float16,0,0.3155375957489014
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,40,1,128,1,float16,fp8,0,0.3048815965652466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,40,1,128,1,fp8,fp8,0,0.30303359031677246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,40,2,128,1,float16,float16,0,0.3237152099609375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,40,2,128,1,float16,fp8,0,0.311081600189209
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,40,2,128,1,fp8,fp8,0,0.31169440746307375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,40,4,128,1,float16,float16,0,0.3313071966171265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,40,4,128,1,float16,fp8,0,0.32028319835662844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,40,4,128,1,fp8,fp8,0,0.3211008071899414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,40,8,128,1,float16,float16,0,0.3510704040527344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,40,8,128,1,float16,fp8,0,0.33460800647735595
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,40,8,128,1,fp8,fp8,0,0.3365231990814209
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,40,1,128,1,float16,fp8,0,5.341052627563476
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,40,1,128,1,fp8,fp8,0,5.345609664916992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,40,1,128,1,float16,float16,0,6.03639030456543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,40,2,128,1,float16,fp8,0,5.416497421264649
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,40,2,128,1,fp8,fp8,0,5.434735870361328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,40,2,128,1,float16,float16,0,6.1720928192138675
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,40,4,128,1,float16,float16,0,6.376822280883789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,40,4,128,1,float16,fp8,0,5.705068969726563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,40,4,128,1,fp8,fp8,0,5.672465515136719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,40,40,128,1,float16,float16,0,4.976574325561524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,40,8,128,1,float16,fp8,0,6.095862579345703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,40,40,128,1,float16,fp8,0,4.881905746459961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,40,8,128,1,fp8,fp8,0,6.126403045654297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,40,8,128,1,float16,float16,0,6.818500518798828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,40,40,128,1,fp8,fp8,0,4.764305496215821
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,40,1,128,1,float16,fp8,0,2.6716335296630858
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,40,1,128,1,float16,float16,0,2.9982160568237304
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,40,1,128,1,fp8,fp8,0,2.699091148376465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,40,2,128,1,float16,fp8,0,2.7355119705200197
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,40,2,128,1,float16,float16,0,2.94345760345459
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,40,2,128,1,fp8,fp8,0,2.737345504760742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,40,4,128,1,float16,fp8,0,2.8434127807617187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,40,4,128,1,float16,float16,0,3.137932777404785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,40,4,128,1,fp8,fp8,0,2.8505647659301756
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,40,8,128,1,float16,float16,0,3.4017055511474608
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,40,8,128,1,float16,fp8,0,3.083024024963379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,40,1,128,1,float16,float16,0,1.4816944122314453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,40,40,128,1,float16,float16,0,2.5565311431884767
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,40,40,128,1,float16,fp8,0,2.395859146118164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,40,1,128,1,float16,fp8,0,1.3509407997131349
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,40,8,128,1,fp8,fp8,0,3.0649152755737306
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,40,40,128,1,fp8,fp8,0,2.3930063247680664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,40,1,128,1,fp8,fp8,0,1.4311087608337403
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,40,2,128,1,float16,float16,0,1.4523776054382325
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,40,2,128,1,float16,fp8,0,1.4584879875183105
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,40,2,128,1,fp8,fp8,0,1.385256004333496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,40,4,128,1,float16,float16,0,1.5016240119934081
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,40,4,128,1,float16,fp8,0,1.4345824241638183
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,40,4,128,1,fp8,fp8,0,1.4408432006835938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,40,8,128,1,float16,float16,0,1.6091392517089844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,40,8,128,1,float16,fp8,0,1.5548720359802246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,40,40,128,1,float16,float16,0,1.26943359375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,40,8,128,1,fp8,fp8,0,1.5896415710449219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,40,40,128,1,float16,fp8,0,1.2180591583251954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,40,1,128,1,float16,float16,0,0.7148255825042724
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,40,1,128,1,float16,fp8,0,0.6931087970733643
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,40,40,128,1,fp8,fp8,0,1.2725616455078126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,40,1,128,1,fp8,fp8,0,0.698473596572876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,40,2,128,1,float16,float16,0,0.7324687957763671
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,40,2,128,1,float16,fp8,0,0.7079055786132813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,40,2,128,1,fp8,fp8,0,0.7078400135040284
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,40,4,128,1,float16,float16,0,0.7623472213745117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,40,4,128,1,float16,fp8,0,0.7324416160583496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,40,4,128,1,fp8,fp8,0,0.7356304168701172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,40,40,128,1,float16,float16,0,0.6490384101867676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,40,8,128,1,float16,float16,0,0.8157952308654786
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,40,1,128,1,float16,float16,0,0.3684959888458252
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,40,40,128,1,fp8,fp8,0,0.6219151973724365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,40,8,128,1,float16,fp8,0,0.7868607997894287
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,40,8,128,1,fp8,fp8,0,0.787656021118164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,40,40,128,1,float16,fp8,0,0.6218607902526856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,40,1,128,1,float16,fp8,0,0.35947039127349856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,40,1,128,1,fp8,fp8,0,0.35929279327392577
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,40,2,128,1,float16,float16,0,0.3779616117477417
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,40,2,128,1,float16,fp8,0,0.36424319744110106
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,40,2,128,1,fp8,fp8,0,0.3669600009918213
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,40,4,128,1,float16,float16,0,0.3913088083267212
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,40,4,128,1,float16,fp8,0,0.38052799701690676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,40,4,128,1,fp8,fp8,0,0.38083839416503906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,40,8,128,1,float16,float16,0,0.41791839599609376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,40,8,128,1,float16,fp8,0,0.40860800743103026
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,40,8,128,1,fp8,fp8,0,0.4066415786743164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,40,40,128,1,float16,float16,0,0.34067840576171876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,40,40,128,1,float16,fp8,0,0.3224384069442749
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,40,40,128,1,fp8,fp8,0,0.321561598777771
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,40,1,128,1,float16,float16,0,0.1979647994041443
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,40,1,128,1,float16,fp8,0,0.19344799518585204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,40,1,128,1,fp8,fp8,0,0.19121600389480592
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,40,2,128,1,float16,float16,0,0.20090720653533936
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,40,2,128,1,float16,fp8,0,0.19485280513763428
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,40,2,128,1,fp8,fp8,0,0.19334080219268798
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,40,4,128,1,float16,float16,0,0.20791199207305908
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,40,4,128,1,float16,fp8,0,0.2027616024017334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,40,4,128,1,fp8,fp8,0,0.2026304006576538
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,40,8,128,1,float16,float16,0,0.22206239700317382
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,40,8,128,1,float16,fp8,0,0.21407198905944824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,40,8,128,1,fp8,fp8,0,0.2168031930923462
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,40,1,128,1,float16,fp8,0,5.247959899902344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,40,1,128,1,fp8,fp8,0,5.231947326660157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,40,1,128,1,float16,float16,0,5.850958251953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,40,2,128,1,float16,fp8,0,5.3868976593017575
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,40,2,128,1,float16,float16,0,5.999932861328125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,40,2,128,1,fp8,fp8,0,5.402230453491211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,40,4,128,1,float16,fp8,0,5.690107345581055
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,40,4,128,1,float16,float16,0,6.33758544921875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,40,1,128,1,float16,float16,0,2.885264015197754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,40,4,128,1,fp8,fp8,0,5.695862579345703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,40,40,128,1,float16,float16,0,5.639723205566407
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,40,40,128,1,float16,fp8,0,5.413870239257813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,40,8,128,1,float16,float16,0,6.814904022216797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,40,8,128,1,float16,fp8,0,6.374335861206054
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,40,8,128,1,fp8,fp8,0,6.299473571777344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,40,40,128,1,fp8,fp8,0,5.4175567626953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,40,1,128,1,float16,fp8,0,2.6421487808227537
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,40,1,128,1,fp8,fp8,0,2.647342491149902
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,40,2,128,1,float16,float16,0,2.769241523742676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,40,2,128,1,float16,fp8,0,2.7440160751342773
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,40,2,128,1,fp8,fp8,0,2.7250848770141602
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,40,4,128,1,float16,fp8,0,2.858884811401367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,40,4,128,1,float16,float16,0,3.0801263809204102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,40,4,128,1,fp8,fp8,0,2.8638128280639648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,40,8,128,1,float16,float16,0,3.335287857055664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,40,1,128,1,float16,float16,0,1.3768896102905273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,40,8,128,1,float16,fp8,0,3.242967987060547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,40,40,128,1,float16,float16,0,2.829443168640137
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,40,8,128,1,fp8,fp8,0,3.1735776901245116
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,40,1,128,1,float16,fp8,0,1.339515209197998
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,40,40,128,1,float16,fp8,0,2.723379135131836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,40,40,128,1,fp8,fp8,0,2.83165283203125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,40,1,128,1,fp8,fp8,0,1.3754176139831542
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,40,2,128,1,float16,float16,0,1.4431648254394531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,40,2,128,1,float16,fp8,0,1.3732912063598632
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,40,2,128,1,fp8,fp8,0,1.3682944297790527
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,40,4,128,1,float16,float16,0,1.4989551544189452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,40,4,128,1,float16,fp8,0,1.4548768043518066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,40,4,128,1,fp8,fp8,0,1.4420080184936523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,40,8,128,1,float16,float16,0,1.6400623321533203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,40,8,128,1,float16,fp8,0,1.5980463981628419
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,40,40,128,1,float16,float16,0,1.4142383575439452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,40,8,128,1,fp8,fp8,0,1.6640575408935547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,40,1,128,1,float16,float16,0,0.7016272068023681
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,40,40,128,1,fp8,fp8,0,1.3964159965515137
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,40,40,128,1,float16,fp8,0,1.3746959686279296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,40,1,128,1,float16,fp8,0,0.6779952049255371
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,40,1,128,1,fp8,fp8,0,0.6860223770141601
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,40,2,128,1,float16,float16,0,0.7183328151702881
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,40,4,128,1,float16,float16,0,0.7540800094604492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,40,2,128,1,float16,fp8,0,0.70174241065979
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,40,2,128,1,fp8,fp8,0,0.7065072059631348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,40,4,128,1,float16,fp8,0,0.737663984298706
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,40,4,128,1,fp8,fp8,0,0.7330671787261963
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,40,8,128,1,float16,float16,0,0.8316415786743164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,40,8,128,1,float16,fp8,0,0.8089103698730469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,40,40,128,1,float16,float16,0,0.7215807914733887
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,40,8,128,1,fp8,fp8,0,0.8083279609680176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,40,40,128,1,float16,fp8,0,0.7054224014282227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,40,1,128,1,float16,float16,0,0.36419999599456787
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,40,40,128,1,fp8,fp8,0,0.7055247783660888
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,40,1,128,1,float16,fp8,0,0.35272319316864015
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,40,1,128,1,fp8,fp8,0,0.35359361171722414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,40,4,128,1,float16,fp8,0,0.3822767972946167
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,40,2,128,1,float16,float16,0,0.3703968048095703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,40,2,128,1,float16,fp8,0,0.35933918952941896
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,40,2,128,1,fp8,fp8,0,0.3623392105102539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,40,4,128,1,float16,float16,0,0.38923039436340334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,40,4,128,1,fp8,fp8,0,0.3843951940536499
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,40,8,128,1,float16,float16,0,0.4269120216369629
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,40,8,128,1,float16,fp8,0,0.4152031898498535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,40,8,128,1,fp8,fp8,0,0.417526388168335
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,40,40,128,1,float16,float16,0,0.3738032102584839
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,40,40,128,1,float16,fp8,0,0.3612688064575195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,40,40,128,1,fp8,fp8,0,0.3651103973388672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,40,1,128,1,float16,float16,0,0.192302405834198
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,40,1,128,1,float16,fp8,0,0.19067360162734986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,40,1,128,1,fp8,fp8,0,0.18837120532989501
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,40,2,128,1,float16,float16,0,0.19850560426712036
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,40,2,128,1,float16,fp8,0,0.19484479427337648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,40,2,128,1,fp8,fp8,0,0.19427679777145385
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,40,4,128,1,float16,float16,0,0.20546879768371581
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,40,4,128,1,float16,fp8,0,0.2029792070388794
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,40,4,128,1,fp8,fp8,0,0.20232319831848145
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,40,8,128,1,float16,float16,0,0.22779519557952882
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,40,8,128,1,float16,fp8,0,0.22059199810028077
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,40,1,128,1,float16,fp8,0,0.10027840137481689
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,40,8,128,1,fp8,fp8,0,0.22164640426635743
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,40,40,128,1,float16,float16,0,0.20609440803527831
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,40,2,128,1,float16,fp8,0,0.10072480440139771
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,40,2,128,1,fp8,fp8,0,0.10092639923095703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,40,40,128,1,float16,fp8,0,0.19848480224609374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,40,40,128,1,fp8,fp8,0,0.19789279699325563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,40,1,128,1,float16,float16,0,0.10712480545043945
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,40,1,128,1,fp8,fp8,0,0.10127040147781372
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,40,2,128,1,float16,float16,0,0.1080064058303833
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,40,4,128,1,float16,float16,0,0.11508799791336059
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,40,4,128,1,float16,fp8,0,0.10698080062866211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,40,4,128,1,fp8,fp8,0,0.10989600419998169
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,40,8,128,1,float16,float16,0,0.12274399995803834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,40,8,128,1,float16,fp8,0,0.11678400039672851
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,40,8,128,1,fp8,fp8,0,0.11998560428619384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,40,1,128,1,float16,float16,0,3.442923355102539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,40,1,128,1,float16,fp8,0,3.2592464447021485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,40,1,128,1,fp8,fp8,0,3.240143966674805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,40,2,128,1,float16,float16,0,3.4651519775390627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,40,2,128,1,float16,fp8,0,3.362204742431641
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,40,2,128,1,fp8,fp8,0,3.3493392944335936
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,40,4,128,1,float16,float16,0,3.7362960815429687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,40,4,128,1,float16,fp8,0,3.5905391693115236
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,40,4,128,1,fp8,fp8,0,3.708588790893555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,40,8,128,1,float16,float16,0,4.226027297973633
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,40,8,128,1,float16,fp8,0,4.024494552612305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,40,40,128,1,float16,float16,0,3.8168304443359373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,40,8,128,1,fp8,fp8,0,4.077347183227539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,40,40,128,1,float16,fp8,0,3.737635040283203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,40,40,128,1,fp8,fp8,0,3.7384384155273436
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,40,1,128,1,float16,float16,0,1.6957279205322267
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,40,1,128,1,float16,fp8,0,1.6767791748046874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,40,1,128,1,fp8,fp8,0,1.6336624145507812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,40,2,128,1,float16,float16,0,1.7472463607788087
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,40,2,128,1,float16,fp8,0,1.7042560577392578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,40,2,128,1,fp8,fp8,0,1.6877967834472656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,40,4,128,1,float16,float16,0,1.8552688598632812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,40,4,128,1,float16,fp8,0,1.828206443786621
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,40,4,128,1,fp8,fp8,0,1.818943977355957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,40,8,128,1,float16,float16,0,2.067255973815918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,40,8,128,1,float16,fp8,0,2.0259647369384766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,40,1,128,1,float16,float16,0,0.8514927864074707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,40,40,128,1,float16,fp8,0,1.883795166015625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,40,1,128,1,float16,fp8,0,0.8395824432373047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,40,8,128,1,fp8,fp8,0,2.0714031219482423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,40,2,128,1,float16,float16,0,0.8765135765075683
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,40,40,128,1,float16,float16,0,1.922724723815918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,40,40,128,1,fp8,fp8,0,1.9090784072875977
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,40,1,128,1,fp8,fp8,0,0.8785535812377929
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,40,2,128,1,float16,fp8,0,0.8639632225036621
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,40,2,128,1,fp8,fp8,0,0.8574607849121094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,40,4,128,1,float16,float16,0,0.9344127655029297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,40,4,128,1,float16,fp8,0,0.9168383598327636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,40,4,128,1,fp8,fp8,0,0.9112303733825684
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,40,8,128,1,float16,float16,0,1.0409119606018067
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,40,8,128,1,float16,fp8,0,1.0302783966064453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,40,8,128,1,fp8,fp8,0,1.0367232322692872
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,40,40,128,1,fp8,fp8,0,0.9698543548583984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,40,40,128,1,float16,float16,0,0.9756848335266113
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,40,40,128,1,float16,fp8,0,0.9557248115539551
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,40,1,128,1,float16,float16,0,0.4382031917572021
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,40,1,128,1,float16,fp8,0,0.4265920162200928
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,40,1,128,1,fp8,fp8,0,0.4270143985748291
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,40,2,128,1,float16,float16,0,0.4508927822113037
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,40,2,128,1,float16,fp8,0,0.44490718841552734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,40,2,128,1,fp8,fp8,0,0.44620962142944337
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,40,4,128,1,float16,float16,0,0.47951040267944334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,40,4,128,1,float16,fp8,0,0.46805758476257325
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,40,4,128,1,fp8,fp8,0,0.4736112117767334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,40,8,128,1,float16,float16,0,0.5338799953460693
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,40,8,128,1,float16,fp8,0,0.525377607345581
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,40,8,128,1,fp8,fp8,0,0.5285583972930908
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,40,40,128,1,float16,float16,0,0.502723217010498
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,40,40,128,1,float16,fp8,0,0.49788479804992675
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,40,40,128,1,fp8,fp8,0,0.49805278778076173
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,40,1,128,1,float16,float16,0,0.22772319316864015
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,40,1,128,1,float16,fp8,0,0.2269439935684204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,40,1,128,1,fp8,fp8,0,0.22583360671997071
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,40,2,128,1,float16,float16,0,0.2364192008972168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,40,2,128,1,float16,fp8,0,0.23521120548248292
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,40,2,128,1,fp8,fp8,0,0.2327039957046509
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,40,4,128,1,float16,float16,0,0.2514336109161377
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,40,4,128,1,float16,fp8,0,0.24765279293060302
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,40,4,128,1,fp8,fp8,0,0.24733760356903076
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,40,8,128,1,float16,float16,0,0.27992639541625974
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,40,8,128,1,float16,fp8,0,0.2727855920791626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,40,8,128,1,fp8,fp8,0,0.2748559951782227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,40,40,128,1,float16,float16,0,0.2654047966003418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,40,40,128,1,float16,fp8,0,0.2623215913772583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,40,40,128,1,fp8,fp8,0,0.263102388381958
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,40,1,128,1,float16,float16,0,0.1265663981437683
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,40,1,128,1,float16,fp8,0,0.12428799867630005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,40,1,128,1,fp8,fp8,0,0.1257680058479309
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,40,2,128,1,float16,float16,0,0.12754080295562745
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,40,2,128,1,float16,fp8,0,0.1272495985031128
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,40,2,128,1,fp8,fp8,0,0.12678079605102538
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,40,4,128,1,float16,float16,0,0.13721120357513428
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,40,4,128,1,float16,fp8,0,0.13608959913253785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,40,4,128,1,fp8,fp8,0,0.13484159708023072
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,40,8,128,1,float16,float16,0,0.14944640398025513
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,40,8,128,1,float16,fp8,0,0.1488111972808838
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,40,8,128,1,fp8,fp8,0,0.14832960367202758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,40,40,128,1,float16,float16,0,0.14683040380477905
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,40,40,128,1,float16,fp8,0,0.14469599723815918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,40,40,128,1,fp8,fp8,0,0.14527679681777955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,40,1,128,1,float16,float16,0,0.07042239904403687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,40,1,128,1,float16,fp8,0,0.06885120272636414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,40,1,128,1,fp8,fp8,0,0.06886079907417297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,40,2,128,1,float16,float16,0,0.07212960124015808
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,40,2,128,1,float16,fp8,0,0.06946240067481994
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,40,2,128,1,fp8,fp8,0,0.06959360241889953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,40,4,128,1,float16,float16,0,0.07487360239028931
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,40,4,128,1,float16,fp8,0,0.07243840098381042
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,40,4,128,1,fp8,fp8,0,0.07298079729080201
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,40,8,128,1,float16,float16,0,0.08527039885520935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,40,8,128,1,float16,fp8,0,0.07978559732437134
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,40,8,128,1,fp8,fp8,0,0.07940959930419922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,40,1,128,1,float16,float16,0,3.4629119873046874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,40,1,128,1,float16,fp8,0,3.422150421142578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,40,1,128,1,fp8,fp8,0,3.414231872558594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,40,2,128,1,float16,float16,0,3.671857452392578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,40,2,128,1,float16,fp8,0,3.564604949951172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,40,2,128,1,fp8,fp8,0,3.5799137115478517
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,40,4,128,1,float16,float16,0,3.902094268798828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,40,4,128,1,float16,fp8,0,3.890193557739258
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,40,4,128,1,fp8,fp8,0,3.9047824859619142
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,40,8,128,1,float16,float16,0,4.509502410888672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,40,8,128,1,float16,fp8,0,4.453692626953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,40,8,128,1,fp8,fp8,0,4.51297607421875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,40,40,128,1,float16,float16,0,4.600684738159179
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,40,1,128,1,float16,float16,0,1.7338752746582031
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,40,40,128,1,float16,fp8,0,4.572169494628906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,40,40,128,1,fp8,fp8,0,4.539120101928711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,40,1,128,1,float16,fp8,0,1.7381664276123048
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,40,1,128,1,fp8,fp8,0,1.7223167419433594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,40,2,128,1,float16,float16,0,1.8095487594604491
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,40,2,128,1,float16,fp8,0,1.7962047576904296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,40,2,128,1,fp8,fp8,0,1.8267072677612304
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,40,4,128,1,float16,float16,0,1.9462976455688477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,40,4,128,1,float16,fp8,0,1.955526351928711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,40,4,128,1,fp8,fp8,0,1.9499856948852539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,40,8,128,1,float16,float16,0,2.2592063903808595
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,40,8,128,1,float16,fp8,0,2.2420095443725585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,40,1,128,1,float16,float16,0,0.8777119636535644
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,40,8,128,1,fp8,fp8,0,2.2551631927490234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,40,1,128,1,float16,fp8,0,0.8780863761901856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,40,40,128,1,float16,float16,0,2.318604850769043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,40,1,128,1,fp8,fp8,0,0.8908672332763672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,40,40,128,1,float16,fp8,0,2.294798469543457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,40,2,128,1,float16,float16,0,0.9117856025695801
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,40,40,128,1,fp8,fp8,0,2.3136480331420897
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,40,2,128,1,float16,fp8,0,0.9154159545898437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,40,2,128,1,fp8,fp8,0,0.9170576095581054
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,40,4,128,1,float16,float16,0,0.9903792381286621
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,40,4,128,1,float16,fp8,0,0.9866656303405762
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,40,4,128,1,fp8,fp8,0,1.000059223175049
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,40,8,128,1,float16,float16,0,1.1431952476501466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,40,8,128,1,float16,fp8,0,1.1330032348632812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,40,1,128,1,float16,float16,0,0.44870882034301757
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,40,8,128,1,fp8,fp8,0,1.140822410583496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,40,40,128,1,float16,float16,0,1.1750975608825684
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,40,40,128,1,float16,fp8,0,1.1599007606506349
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,40,40,128,1,fp8,fp8,0,1.1543168067932128
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,40,1,128,1,float16,fp8,0,0.4537600040435791
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,40,2,128,1,float16,fp8,0,0.469652795791626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,40,4,128,1,fp8,fp8,0,0.5063536167144775
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,40,1,128,1,fp8,fp8,0,0.4522704124450684
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,40,2,128,1,float16,float16,0,0.469542407989502
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,40,2,128,1,fp8,fp8,0,0.471943998336792
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,40,4,128,1,float16,fp8,0,0.5058095932006836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,40,4,128,1,float16,float16,0,0.5030960083007813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,40,8,128,1,fp8,fp8,0,0.5781792163848877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,40,1,128,1,float16,fp8,0,0.2383552074432373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,40,1,128,1,float16,float16,0,0.23998401165008545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,40,40,128,1,float16,float16,0,0.6012671947479248
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,40,8,128,1,float16,fp8,0,0.5788991928100586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,40,8,128,1,float16,float16,0,0.5834671974182128
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,40,1,128,1,fp8,fp8,0,0.23953759670257568
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,40,40,128,1,fp8,fp8,0,0.594326400756836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,40,40,128,1,float16,fp8,0,0.5904064178466797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,40,2,128,1,float16,float16,0,0.24575839042663575
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,40,2,128,1,float16,fp8,0,0.24727680683135986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,40,2,128,1,fp8,fp8,0,0.2464128017425537
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,40,4,128,1,float16,float16,0,0.26701281070709226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,40,4,128,1,float16,fp8,0,0.26816320419311523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,40,4,128,1,fp8,fp8,0,0.2657599925994873
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,40,8,128,1,float16,float16,0,0.3035936117172241
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,40,8,128,1,float16,fp8,0,0.30149919986724855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,40,1,128,1,float16,fp8,0,0.13099360466003418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,40,8,128,1,fp8,fp8,0,0.3006223917007446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,40,40,128,1,float16,float16,0,0.31640961170196535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,40,40,128,1,float16,fp8,0,0.3083967924118042
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,40,40,128,1,fp8,fp8,0,0.30914719104766847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,40,1,128,1,float16,float16,0,0.13089120388031006
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,40,1,128,1,fp8,fp8,0,0.1319808006286621
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,40,2,128,1,float16,float16,0,0.13692959547042846
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,40,2,128,1,float16,fp8,0,0.13784639835357665
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,40,2,128,1,fp8,fp8,0,0.13677120208740234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,40,4,128,1,float16,float16,0,0.14507679939270018
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,40,4,128,1,float16,fp8,0,0.14516960382461547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,40,4,128,1,fp8,fp8,0,0.1467967987060547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,40,8,128,1,float16,float16,0,0.16412320137023925
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,40,8,128,1,float16,fp8,0,0.1642032027244568
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,40,8,128,1,fp8,fp8,0,0.1634335994720459
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,40,40,128,1,float16,float16,0,0.17154879570007325
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,40,40,128,1,float16,fp8,0,0.16914399862289428
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,40,40,128,1,fp8,fp8,0,0.16934239864349365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,40,1,128,1,float16,float16,0,0.07538719773292542
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,40,1,128,1,float16,fp8,0,0.07332159876823426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,40,1,128,1,fp8,fp8,0,0.07186239957809448
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,40,2,128,1,float16,float16,0,0.07798720002174378
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,40,2,128,1,float16,fp8,0,0.07400799989700317
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,40,2,128,1,fp8,fp8,0,0.07426720261573791
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,40,4,128,1,float16,float16,0,0.08373280167579651
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,40,4,128,1,float16,fp8,0,0.08008800148963928
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,40,4,128,1,fp8,fp8,0,0.07934880256652832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,40,8,128,1,float16,float16,0,0.09352319836616516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,40,8,128,1,float16,fp8,0,0.08990079760551453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,40,8,128,1,fp8,fp8,0,0.08991199731826782
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,40,40,128,1,float16,float16,0,0.09972959756851196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,40,40,128,1,float16,fp8,0,0.09139999747276306
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,40,40,128,1,fp8,fp8,0,0.09239199757575989
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,40,1,128,1,float16,float16,0,0.04677439928054809
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,40,1,128,1,float16,fp8,0,0.04633919894695282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,40,1,128,1,fp8,fp8,0,0.04683839976787567
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,40,2,128,1,float16,float16,0,0.04717119932174683
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,40,2,128,1,float16,fp8,0,0.047198399901390076
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,40,2,128,1,fp8,fp8,0,0.04726560115814209
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,40,4,128,1,float16,float16,0,0.04794879853725433
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,40,4,128,1,float16,fp8,0,0.04819520115852356
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,40,4,128,1,fp8,fp8,0,0.048126399517059326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,40,8,128,1,float16,float16,0,0.05382080078125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,40,8,128,1,float16,fp8,0,0.05293599963188171
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,40,8,128,1,fp8,fp8,0,0.05273600220680237
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,40,1,128,1,float16,float16,0,2.443067169189453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,40,1,128,1,float16,fp8,0,2.495686340332031
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,40,1,128,1,fp8,fp8,0,2.5042863845825196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,40,2,128,1,float16,float16,0,2.5910783767700196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,40,2,128,1,float16,fp8,0,2.6603424072265627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,40,2,128,1,fp8,fp8,0,2.649995231628418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,40,4,128,1,float16,float16,0,2.897603225708008
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,40,4,128,1,float16,fp8,0,2.9663503646850584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,40,4,128,1,fp8,fp8,0,2.9493024826049803
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,40,8,128,1,float16,float16,0,3.5049312591552733
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,40,8,128,1,float16,fp8,0,3.5932865142822266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,40,8,128,1,fp8,fp8,0,3.5758945465087892
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,40,1,128,1,float16,float16,0,1.238260841369629
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,40,40,128,1,float16,float16,0,4.092950439453125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,40,1,128,1,fp8,fp8,0,1.2611071586608886
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,40,1,128,1,float16,fp8,0,1.262390422821045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,40,40,128,1,float16,fp8,0,4.090224075317383
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,40,2,128,1,float16,float16,0,1.325937557220459
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,40,2,128,1,float16,fp8,0,1.332140827178955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,40,2,128,1,fp8,fp8,0,1.3325360298156739
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,40,4,128,1,float16,float16,0,1.468385601043701
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,40,40,128,1,fp8,fp8,0,4.1235710144042965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,40,4,128,1,float16,fp8,0,1.482731246948242
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,40,4,128,1,fp8,fp8,0,1.4844927787780762
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,40,8,128,1,float16,float16,0,1.760651206970215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,40,1,128,1,float16,float16,0,0.630017614364624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,40,8,128,1,float16,fp8,0,1.8120512008666991
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,40,1,128,1,float16,fp8,0,0.6453519821166992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,40,8,128,1,fp8,fp8,0,1.7864799499511719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,40,40,128,1,float16,float16,0,2.0615583419799806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,40,1,128,1,fp8,fp8,0,0.6408703804016114
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,40,40,128,1,float16,fp8,0,2.066192054748535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,40,40,128,1,fp8,fp8,0,2.077755165100098
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,40,2,128,1,float16,float16,0,0.6656864166259766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,40,2,128,1,float16,fp8,0,0.6890912055969238
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,40,2,128,1,fp8,fp8,0,0.6811840057373046
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,40,4,128,1,float16,float16,0,0.7436304092407227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,40,4,128,1,float16,fp8,0,0.7548719882965088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,40,4,128,1,fp8,fp8,0,0.7491968154907227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,40,8,128,1,float16,float16,0,0.8968591690063477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,40,8,128,1,float16,fp8,0,0.9049391746520996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,40,40,128,1,float16,float16,0,1.044318389892578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,40,8,128,1,fp8,fp8,0,0.9045248031616211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,40,1,128,1,float16,float16,0,0.32837600708007814
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,40,1,128,1,float16,fp8,0,0.3325952053070068
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,40,40,128,1,float16,fp8,0,1.0458111763000488
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,40,1,128,1,fp8,fp8,0,0.33527040481567383
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,40,40,128,1,fp8,fp8,0,1.0451264381408691
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,40,2,128,1,float16,float16,0,0.34711360931396484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,40,2,128,1,float16,fp8,0,0.35280001163482666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,40,2,128,1,fp8,fp8,0,0.3525791883468628
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,40,4,128,1,float16,float16,0,0.384335994720459
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,40,4,128,1,float16,fp8,0,0.3872064113616943
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,40,4,128,1,fp8,fp8,0,0.3889039993286133
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,40,8,128,1,float16,float16,0,0.46203198432922366
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,40,8,128,1,float16,fp8,0,0.4646575927734375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,40,8,128,1,fp8,fp8,0,0.4640960216522217
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,40,40,128,1,float16,float16,0,0.5353295803070068
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,40,40,128,1,float16,fp8,0,0.5350272178649902
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,40,1,128,1,float16,float16,0,0.1774832010269165
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,40,40,128,1,fp8,fp8,0,0.5362847805023193
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,40,1,128,1,float16,fp8,0,0.18044639825820924
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,40,1,128,1,fp8,fp8,0,0.18122559785842896
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,40,2,128,1,float16,float16,0,0.1858944058418274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,40,8,128,1,float16,float16,0,0.24244320392608643
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,40,2,128,1,float16,fp8,0,0.1882048010826111
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,40,2,128,1,fp8,fp8,0,0.18793920278549195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,40,4,128,1,float16,float16,0,0.2037600040435791
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,40,4,128,1,float16,fp8,0,0.20813920497894287
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,40,4,128,1,fp8,fp8,0,0.2076256036758423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,40,8,128,1,float16,fp8,0,0.24425280094146729
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,40,8,128,1,fp8,fp8,0,0.24507999420166016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,40,40,128,1,float16,float16,0,0.28194398880004884
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,40,40,128,1,float16,fp8,0,0.28072960376739503
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,40,40,128,1,fp8,fp8,0,0.2805216073989868
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,40,1,128,1,float16,float16,0,0.09982560276985168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,40,1,128,1,float16,fp8,0,0.10208319425582886
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,40,1,128,1,fp8,fp8,0,0.10138239860534667
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,40,2,128,1,float16,float16,0,0.10563679933547973
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,40,2,128,1,float16,fp8,0,0.1086959958076477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,40,2,128,1,fp8,fp8,0,0.1076367974281311
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,40,4,128,1,float16,float16,0,0.11384960412979125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,40,4,128,1,float16,fp8,0,0.11556639671325683
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,40,4,128,1,fp8,fp8,0,0.11540160179138184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,40,8,128,1,float16,float16,0,0.13381439447402954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,40,8,128,1,float16,fp8,0,0.13426079750061035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,40,8,128,1,fp8,fp8,0,0.1352031946182251
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,40,40,128,1,float16,float16,0,0.1540735960006714
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,40,40,128,1,float16,fp8,0,0.15351840257644653
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,40,40,128,1,fp8,fp8,0,0.15314879417419433
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,40,1,128,1,float16,float16,0,0.059961599111557004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,40,1,128,1,float16,fp8,0,0.05702400207519531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,40,1,128,1,fp8,fp8,0,0.05767520070075989
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,40,2,128,1,float16,float16,0,0.06151999831199646
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,40,2,128,1,float16,fp8,0,0.058740800619125365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,40,2,128,1,fp8,fp8,0,0.0594864010810852
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,40,4,128,1,float16,float16,0,0.06905760169029236
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,40,4,128,1,float16,fp8,0,0.06461279988288879
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,40,4,128,1,fp8,fp8,0,0.06537759900093079
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,40,8,128,1,float16,float16,0,0.07811840176582337
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,40,8,128,1,float16,fp8,0,0.07414240241050721
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,40,8,128,1,fp8,fp8,0,0.07539520263671876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,40,40,128,1,float16,float16,0,0.08884959816932678
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,40,40,128,1,float16,fp8,0,0.08218880295753479
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,40,40,128,1,fp8,fp8,0,0.08288000226020813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,40,1,128,1,float16,float16,0,0.038217601180076596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,40,1,128,1,float16,fp8,0,0.039603200554847715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,40,1,128,1,fp8,fp8,0,0.03934400081634522
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,40,2,128,1,float16,float16,0,0.03912320137023926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,40,2,128,1,float16,fp8,0,0.03947199881076813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,40,2,128,1,fp8,fp8,0,0.039473599195480345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,40,4,128,1,float16,float16,0,0.03969280123710632
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,40,4,128,1,float16,fp8,0,0.04036639928817749
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,40,4,128,1,fp8,fp8,0,0.040670400857925414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,40,8,128,1,float16,float16,0,0.0445360004901886
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,40,8,128,1,float16,fp8,0,0.04549759924411774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,40,8,128,1,fp8,fp8,0,0.045212799310684205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,40,40,128,1,float16,float16,0,0.050892800092697144
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,40,40,128,1,float16,fp8,0,0.050532799959182736
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,40,40,128,1,fp8,fp8,0,0.050543999671936034
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,40,1,128,1,float16,float16,0,0.028937599062919615
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,40,1,128,1,float16,fp8,0,0.030060800909996032
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,40,1,128,1,fp8,fp8,0,0.030348798632621764
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,40,2,128,1,float16,float16,0,0.029049599170684816
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,40,2,128,1,float16,fp8,0,0.030244800448417663
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,40,2,128,1,fp8,fp8,0,0.029980799555778502
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,40,4,128,1,float16,float16,0,0.029283198714256286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,40,4,128,1,float16,fp8,0,0.030692800879478455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,40,4,128,1,fp8,fp8,0,0.030454400181770324
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,40,8,128,1,float16,float16,0,0.030239999294281006
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,40,8,128,1,float16,fp8,0,0.03166080117225647
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,40,8,128,1,fp8,fp8,0,0.031841599941253663
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,40,1,128,1,float16,float16,0,0.9785327911376953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,40,1,128,1,float16,fp8,0,1.0308095932006835
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,40,1,128,1,fp8,fp8,0,1.0312383651733399
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,40,2,128,1,float16,float16,0,1.0552720069885253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,40,2,128,1,float16,fp8,0,1.1028512001037598
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,40,2,128,1,fp8,fp8,0,1.1094960212707519
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,40,4,128,1,float16,float16,0,1.204035186767578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,40,4,128,1,float16,fp8,0,1.2584959983825683
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,40,4,128,1,fp8,fp8,0,1.2551919937133789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,40,8,128,1,float16,float16,0,1.4991600036621093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,40,1,128,1,float16,float16,0,0.5043024063110352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,40,8,128,1,float16,fp8,0,1.542422389984131
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,40,1,128,1,float16,fp8,0,0.528601598739624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,40,8,128,1,fp8,fp8,0,1.541649627685547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,40,1,128,1,fp8,fp8,0,0.528492784500122
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,40,40,128,1,float16,float16,0,1.9459503173828125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,40,2,128,1,float16,float16,0,0.539631986618042
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,40,40,128,1,float16,fp8,0,1.9227664947509766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,40,40,128,1,fp8,fp8,0,1.921976089477539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,40,2,128,1,float16,fp8,0,0.5638031959533691
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,40,2,128,1,fp8,fp8,0,0.5637184143066406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,40,4,128,1,float16,float16,0,0.6163919925689697
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,40,4,128,1,float16,fp8,0,0.6383440017700195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,40,4,128,1,fp8,fp8,0,0.6385024070739747
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,40,8,128,1,float16,float16,0,0.7632192134857178
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,40,8,128,1,float16,fp8,0,0.7854576110839844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,40,8,128,1,fp8,fp8,0,0.7831151962280274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,40,40,128,1,float16,float16,0,0.9854800224304199
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,40,1,128,1,float16,float16,0,0.2637808084487915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,40,40,128,1,float16,fp8,0,0.9721920013427734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,40,1,128,1,float16,fp8,0,0.27670879364013673
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,40,40,128,1,fp8,fp8,0,0.97542724609375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,40,1,128,1,fp8,fp8,0,0.2766416072845459
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,40,2,128,1,float16,float16,0,0.283187198638916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,40,2,128,1,float16,fp8,0,0.2957456111907959
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,40,2,128,1,fp8,fp8,0,0.2946943998336792
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,40,4,128,1,float16,float16,0,0.3190704107284546
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,40,4,128,1,float16,fp8,0,0.3322191953659058
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,40,4,128,1,fp8,fp8,0,0.33244800567626953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,40,8,128,1,float16,float16,0,0.39410560131072997
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,40,40,128,1,float16,fp8,0,0.4983215808868408
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,40,8,128,1,float16,fp8,0,0.40302238464355467
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,40,8,128,1,fp8,fp8,0,0.4047056198120117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,40,40,128,1,float16,float16,0,0.507366418838501
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,40,1,128,1,float16,float16,0,0.14441280364990233
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,40,40,128,1,fp8,fp8,0,0.49816322326660156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,40,1,128,1,float16,fp8,0,0.15194560289382936
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,40,1,128,1,fp8,fp8,0,0.15177600383758544
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,40,2,128,1,float16,float16,0,0.1524127960205078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,40,2,128,1,float16,fp8,0,0.1594928026199341
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,40,2,128,1,fp8,fp8,0,0.16043039560317993
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,40,4,128,1,float16,float16,0,0.17189760208129884
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,40,4,128,1,float16,fp8,0,0.17797759771347046
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,40,4,128,1,fp8,fp8,0,0.17795679569244385
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,40,8,128,1,float16,float16,0,0.20822880268096924
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,40,8,128,1,float16,fp8,0,0.21317439079284667
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,40,8,128,1,fp8,fp8,0,0.21283841133117676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,40,40,128,1,float16,float16,0,0.2648224115371704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,40,40,128,1,float16,fp8,0,0.26134400367736815
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,40,40,128,1,fp8,fp8,0,0.2605776071548462
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,40,1,128,1,float16,float16,0,0.08297439813613891
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,40,1,128,1,float16,fp8,0,0.08608959913253784
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,40,1,128,1,fp8,fp8,0,0.08640159964561463
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,40,2,128,1,float16,float16,0,0.08847200274467468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,40,2,128,1,float16,fp8,0,0.09241920113563537
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,40,2,128,1,fp8,fp8,0,0.09300640225410461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,40,4,128,1,float16,float16,0,0.09735680222511292
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,40,4,128,1,float16,fp8,0,0.10070879459381103
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,40,4,128,1,fp8,fp8,0,0.10065599679946899
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,40,8,128,1,float16,float16,0,0.11614559888839722
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,40,8,128,1,float16,fp8,0,0.1183087944984436
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,40,8,128,1,fp8,fp8,0,0.11856319904327392
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,40,40,128,1,float16,float16,0,0.1446336030960083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,40,40,128,1,float16,fp8,0,0.14282239675521852
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,40,40,128,1,fp8,fp8,0,0.14246400594711303
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,40,1,128,1,float16,float16,0,0.05055040121078491
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,40,1,128,1,float16,fp8,0,0.050963199138641356
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,40,1,128,1,fp8,fp8,0,0.050279998779296876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,40,2,128,1,float16,float16,0,0.053547197580337526
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,40,2,128,1,float16,fp8,0,0.05252159833908081
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,40,2,128,1,fp8,fp8,0,0.05163679718971252
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,40,4,128,1,float16,float16,0,0.060115200281143186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,40,4,128,1,float16,fp8,0,0.05771840214729309
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,40,4,128,1,fp8,fp8,0,0.05788319706916809
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,40,40,128,1,float16,fp8,0,0.0762336015701294
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,40,8,128,1,float16,float16,0,0.06857439875602722
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,40,8,128,1,float16,fp8,0,0.06704000234603882
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,40,8,128,1,fp8,fp8,0,0.06738880276679993
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,40,40,128,1,float16,float16,0,0.08359040021896362
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,40,40,128,1,fp8,fp8,0,0.07758079767227173
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,40,1,128,1,float16,float16,0,0.03367840051651001
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,40,1,128,1,float16,fp8,0,0.03580160140991211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,40,1,128,1,fp8,fp8,0,0.03549120128154755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,40,2,128,1,float16,float16,0,0.03376320004463196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,40,2,128,1,float16,fp8,0,0.03579199910163879
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,40,8,128,1,float16,fp8,0,0.041388800740242
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,40,2,128,1,fp8,fp8,0,0.03596639931201935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,40,4,128,1,float16,float16,0,0.03471840023994446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,40,4,128,1,float16,fp8,0,0.03682560026645661
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,40,4,128,1,fp8,fp8,0,0.03684000074863434
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,40,8,128,1,float16,float16,0,0.03986560106277466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,40,8,128,1,fp8,fp8,0,0.04118080139160156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,40,40,128,1,float16,float16,0,0.047060799598693845
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,40,40,128,1,float16,fp8,0,0.04717440009117126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,40,40,128,1,fp8,fp8,0,0.04732640087604523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,40,1,128,1,float16,float16,0,0.02699359953403473
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,40,1,128,1,float16,fp8,0,0.028387200832366944
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,40,1,128,1,fp8,fp8,0,0.02831520140171051
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,40,2,128,1,float16,float16,0,0.027020800113677978
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,40,2,128,1,float16,fp8,0,0.02831839919090271
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,40,2,128,1,fp8,fp8,0,0.02871679961681366
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,40,4,128,1,float16,float16,0,0.027371200919151305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,40,4,128,1,float16,fp8,0,0.028782400488853454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,40,4,128,1,fp8,fp8,0,0.028887999057769776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,40,8,128,1,float16,float16,0,0.028454399108886717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,40,8,128,1,float16,fp8,0,0.029662400484085083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,40,8,128,1,fp8,fp8,0,0.0295199990272522
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,40,40,128,1,float16,float16,0,0.03033440113067627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,40,40,128,1,float16,fp8,0,0.03128480017185211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,40,40,128,1,fp8,fp8,0,0.03115679919719696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,40,1,128,1,float16,float16,0,0.023073600232601167
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,40,1,128,1,float16,fp8,0,0.023984000086784363
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,40,1,128,1,fp8,fp8,0,0.02401600033044815
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,40,2,128,1,float16,float16,0,0.02322240024805069
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,40,2,128,1,float16,fp8,0,0.02398560047149658
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,40,2,128,1,fp8,fp8,0,0.024083200097084045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,40,4,128,1,float16,float16,0,0.02359360009431839
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,40,4,128,1,float16,fp8,0,0.024214400351047514
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,40,4,128,1,fp8,fp8,0,0.02436159998178482
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,40,8,128,1,float16,float16,0,0.023824000358581544
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,40,8,128,1,float16,fp8,0,0.024510399997234346
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,40,8,128,1,fp8,fp8,0,0.024235199391841888
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,40,1,128,1,float16,float16,0,0.47020797729492186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,40,1,128,1,float16,fp8,0,0.508673620223999
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,40,1,128,1,fp8,fp8,0,0.5098639965057373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,40,2,128,1,float16,float16,0,0.5064608097076416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,40,2,128,1,float16,fp8,0,0.5491983890533447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,40,2,128,1,fp8,fp8,0,0.5426000118255615
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,40,4,128,1,float16,float16,0,0.5833295822143555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,40,4,128,1,float16,fp8,0,0.6189104080200195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,40,4,128,1,fp8,fp8,0,0.6189040184020996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,40,8,128,1,float16,float16,0,0.7232304096221924
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,40,8,128,1,float16,fp8,0,0.7669616222381592
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,40,8,128,1,fp8,fp8,0,0.7688799858093261
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,40,40,128,1,float16,float16,0,0.9657152175903321
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,40,1,128,1,float16,float16,0,0.2444927930831909
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,40,1,128,1,float16,fp8,0,0.26494879722595216
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,40,40,128,1,float16,fp8,0,0.9681632041931152
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,40,40,128,1,fp8,fp8,0,0.9685168266296387
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,40,1,128,1,fp8,fp8,0,0.2646512031555176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,40,2,128,1,float16,float16,0,0.2644959926605225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,40,2,128,1,float16,fp8,0,0.2846895933151245
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,40,2,128,1,fp8,fp8,0,0.2862272024154663
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,40,4,128,1,float16,float16,0,0.3002415895462036
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,40,4,128,1,float16,fp8,0,0.31944000720977783
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,40,4,128,1,fp8,fp8,0,0.31939520835876467
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,40,8,128,1,float16,float16,0,0.37351040840148925
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,40,8,128,1,float16,fp8,0,0.3923183917999268
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,40,8,128,1,fp8,fp8,0,0.39455039501190187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,40,40,128,1,float16,float16,0,0.4952752113342285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,40,40,128,1,float16,fp8,0,0.4964896202087402
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,40,1,128,1,float16,float16,0,0.1373744010925293
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,40,40,128,1,fp8,fp8,0,0.49588961601257325
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,40,1,128,1,float16,fp8,0,0.1476415991783142
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,40,1,128,1,fp8,fp8,0,0.14611999988555907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,40,2,128,1,float16,float16,0,0.144814395904541
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,40,2,128,1,float16,fp8,0,0.15554879903793334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,40,2,128,1,fp8,fp8,0,0.15534080266952516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,40,4,128,1,float16,float16,0,0.16248480081558228
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,40,4,128,1,float16,fp8,0,0.17335360050201415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,40,4,128,1,fp8,fp8,0,0.17373280525207518
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,40,8,128,1,float16,float16,0,0.19899200201034545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,40,8,128,1,float16,fp8,0,0.20921919345855713
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,40,8,128,1,fp8,fp8,0,0.2095952033996582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,40,40,128,1,float16,float16,0,0.2604207992553711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,40,40,128,1,float16,fp8,0,0.2576368093490601
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,40,40,128,1,fp8,fp8,0,0.25682079792022705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,40,1,128,1,float16,float16,0,0.07982079982757569
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,40,1,128,1,float16,fp8,0,0.08284800052642823
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,40,1,128,1,fp8,fp8,0,0.08307200074195861
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,40,2,128,1,float16,float16,0,0.08564320206642151
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,40,2,128,1,float16,fp8,0,0.08890560269355774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,40,2,128,1,fp8,fp8,0,0.08865119814872742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,40,4,128,1,float16,float16,0,0.09255520105361939
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,40,4,128,1,float16,fp8,0,0.09677919745445251
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,40,4,128,1,fp8,fp8,0,0.09589920043945313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,40,8,128,1,float16,float16,0,0.11189600229263305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,40,8,128,1,float16,fp8,0,0.11526399850845337
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,40,8,128,1,fp8,fp8,0,0.11526559591293335
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,40,40,128,1,float16,float16,0,0.14218720197677612
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,40,40,128,1,float16,fp8,0,0.13666239976882935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,40,40,128,1,fp8,fp8,0,0.13735519647598265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,40,1,128,1,float16,float16,0,0.046982398629188536
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,40,1,128,1,float16,fp8,0,0.04585599899291992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,40,1,128,1,fp8,fp8,0,0.04599680006504059
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,40,2,128,1,float16,float16,0,0.04905920028686524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,40,2,128,1,float16,fp8,0,0.04735519886016846
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,40,2,128,1,fp8,fp8,0,0.04746879935264588
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,40,4,128,1,float16,float16,0,0.05632320046424866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,40,4,128,1,fp8,fp8,0,0.05227360129356384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,40,4,128,1,float16,fp8,0,0.05254080295562744
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,40,8,128,1,float16,float16,0,0.06567999720573425
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,40,8,128,1,float16,fp8,0,0.06253600120544434
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,40,8,128,1,fp8,fp8,0,0.062375998497009276
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,40,40,128,1,float16,float16,0,0.08206719756126404
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,40,40,128,1,float16,fp8,0,0.07169600129127503
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,40,40,128,1,fp8,fp8,0,0.07249280214309692
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,40,1,128,1,float16,float16,0,0.0320576012134552
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,40,1,128,1,float16,fp8,0,0.03399679958820343
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,40,1,128,1,fp8,fp8,0,0.03412159979343414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,40,2,128,1,float16,float16,0,0.03248479962348938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,40,2,128,1,float16,fp8,0,0.03406879901885986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,40,8,128,1,float16,fp8,0,0.039534398913383485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,40,2,128,1,fp8,fp8,0,0.034215998649597165
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,40,4,128,1,float16,float16,0,0.03359679877758026
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,40,4,128,1,float16,fp8,0,0.03508639931678772
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,40,4,128,1,fp8,fp8,0,0.034822401404380796
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,40,8,128,1,float16,float16,0,0.038631999492645265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,40,8,128,1,fp8,fp8,0,0.039340800046920775
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,40,40,128,1,float16,float16,0,0.04550400078296661
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,40,40,128,1,float16,fp8,0,0.04501599967479706
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,40,40,128,1,fp8,fp8,0,0.0448415994644165
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,40,1,128,1,float16,float16,0,0.027054399251937866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,40,1,128,1,float16,fp8,0,0.028281599283218384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,40,4,128,1,float16,fp8,0,0.028564798831939697
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,40,1,128,1,fp8,fp8,0,0.028331199288368226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,40,2,128,1,float16,float16,0,0.02714720070362091
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,40,2,128,1,float16,fp8,0,0.028460800647735596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,40,2,128,1,fp8,fp8,0,0.028424000740051268
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,40,4,128,1,float16,float16,0,0.027569600939750673
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,40,4,128,1,fp8,fp8,0,0.028627198934555054
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,40,8,128,1,float16,float16,0,0.028417599201202393
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,40,8,128,1,float16,fp8,0,0.029467201232910155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,40,8,128,1,fp8,fp8,0,0.02953920066356659
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,40,40,128,1,float16,float16,0,0.028803199529647827
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,40,40,128,1,float16,fp8,0,0.029230400919914246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,40,40,128,1,fp8,fp8,0,0.029422399401664735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,40,1,128,1,float16,float16,0,0.02157119959592819
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,40,1,128,1,float16,fp8,0,0.022191999852657317
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,40,1,128,1,fp8,fp8,0,0.022103999555110932
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,40,2,128,1,float16,float16,0,0.02186879962682724
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,40,2,128,1,float16,fp8,0,0.02211039960384369
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,40,2,128,1,fp8,fp8,0,0.022679999470710754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,40,4,128,1,float16,float16,0,0.021657599508762358
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,40,4,128,1,float16,fp8,0,0.022278399765491487
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,40,4,128,1,fp8,fp8,0,0.0226160004734993
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,40,8,128,1,float16,float16,0,0.022294400632381438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,40,8,128,1,float16,fp8,0,0.02290239930152893
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,40,8,128,1,fp8,fp8,0,0.022551999986171724
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,40,40,128,1,float16,float16,0,0.02303680032491684
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,40,40,128,1,float16,fp8,0,0.02406879961490631
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,40,40,128,1,fp8,fp8,0,0.02348320037126541
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,40,1,128,1,float16,float16,0,0.02019519954919815
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,40,1,128,1,float16,fp8,0,0.02115679979324341
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,40,1,128,1,fp8,fp8,0,0.02147199958562851
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,40,2,128,1,float16,float16,0,0.020632000267505647
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,40,2,128,1,float16,fp8,0,0.021558399498462676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,40,2,128,1,fp8,fp8,0,0.021089600026607515
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,40,4,128,1,float16,float16,0,0.02081120014190674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,40,4,128,1,float16,fp8,0,0.021352000534534454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,40,4,128,1,fp8,fp8,0,0.021376000344753267
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,40,8,128,1,float16,float16,0,0.02080159932374954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,40,8,128,1,float16,fp8,0,0.021648000180721282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,40,8,128,1,fp8,fp8,0,0.02173759937286377
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,40,1,128,1,float16,float16,0,0.24532480239868165
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,40,1,128,1,float16,fp8,0,0.2640223979949951
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,40,1,128,1,fp8,fp8,0,0.26507999897003176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,40,2,128,1,float16,float16,0,0.26480960845947266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,40,2,128,1,float16,fp8,0,0.2836352109909058
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,40,2,128,1,fp8,fp8,0,0.2847935914993286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,40,4,128,1,float16,float16,0,0.2979887962341309
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,40,4,128,1,float16,fp8,0,0.31825120449066163
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,40,4,128,1,fp8,fp8,0,0.3196271896362305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,40,8,128,1,float16,float16,0,0.40107998847961424
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,40,8,128,1,float16,fp8,0,0.4241055965423584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,40,8,128,1,fp8,fp8,0,0.42299838066101075
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,40,40,128,1,float16,float16,0,0.5683216094970703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,40,40,128,1,float16,fp8,0,0.5794447898864746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,40,1,128,1,float16,float16,0,0.13513760566711425
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,40,1,128,1,float16,fp8,0,0.1466912031173706
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,40,1,128,1,fp8,fp8,0,0.14664959907531738
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,40,40,128,1,fp8,fp8,0,0.5780735969543457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,40,2,128,1,float16,float16,0,0.14320800304412842
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,40,4,128,1,fp8,fp8,0,0.17359999418258668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,40,2,128,1,float16,fp8,0,0.1555567979812622
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,40,2,128,1,fp8,fp8,0,0.15525280237197875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,40,4,128,1,float16,float16,0,0.16180319786071778
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,40,4,128,1,float16,fp8,0,0.17336640357971192
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,40,8,128,1,float16,float16,0,0.2115855932235718
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,40,8,128,1,float16,fp8,0,0.22451999187469482
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,40,8,128,1,fp8,fp8,0,0.22497279644012452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,40,40,128,1,float16,float16,0,0.295796799659729
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,40,40,128,1,float16,fp8,0,0.30173120498657224
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,40,1,128,1,float16,float16,0,0.08003519773483277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,40,40,128,1,fp8,fp8,0,0.30220320224761965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,40,1,128,1,float16,fp8,0,0.08313279747962951
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,40,1,128,1,fp8,fp8,0,0.08327040076255798
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,40,2,128,1,float16,float16,0,0.08569759726524354
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,40,2,128,1,float16,fp8,0,0.08916640281677246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,40,2,128,1,fp8,fp8,0,0.0896511971950531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,40,4,128,1,float16,float16,0,0.0937279999256134
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,40,4,128,1,float16,fp8,0,0.09722560048103332
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,40,4,128,1,fp8,fp8,0,0.09727839827537536
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,40,8,128,1,float16,float16,0,0.11930400133132935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,40,8,128,1,float16,fp8,0,0.1232416033744812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,40,8,128,1,fp8,fp8,0,0.12329440116882324
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,40,40,128,1,float16,float16,0,0.15981600284576417
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,40,40,128,1,float16,fp8,0,0.1606592059135437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,40,40,128,1,fp8,fp8,0,0.16040639877319335
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,40,1,128,1,float16,float16,0,0.047563201189041136
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,40,1,128,1,float16,fp8,0,0.04631839990615845
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,40,1,128,1,fp8,fp8,0,0.04520959854125976
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,40,2,128,1,float16,float16,0,0.04970400035381317
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,40,2,128,1,float16,fp8,0,0.0475600004196167
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,40,2,128,1,fp8,fp8,0,0.047742399573326114
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,40,4,128,1,float16,float16,0,0.05713440179824829
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,40,4,128,1,float16,fp8,0,0.05360320210456848
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,40,4,128,1,fp8,fp8,0,0.054046398401260375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,40,8,128,1,float16,float16,0,0.06912320256233215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,40,8,128,1,float16,fp8,0,0.0651199996471405
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,40,8,128,1,fp8,fp8,0,0.06779680252075196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,40,40,128,1,float16,float16,0,0.08999519944190978
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,40,40,128,1,float16,fp8,0,0.0813759982585907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,40,40,128,1,fp8,fp8,0,0.08217120170593262
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,40,1,128,1,float16,float16,0,0.031913599371910094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,40,1,128,1,float16,fp8,0,0.033847999572753903
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,40,1,128,1,fp8,fp8,0,0.03377760052680969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,40,2,128,1,float16,float16,0,0.032123199105262755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,40,2,128,1,float16,fp8,0,0.03412800133228302
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,40,2,128,1,fp8,fp8,0,0.03401280045509338
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,40,4,128,1,float16,float16,0,0.03332639932632446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,40,4,128,1,float16,fp8,0,0.035104000568389894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,40,4,128,1,fp8,fp8,0,0.03498240113258362
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,40,8,128,1,float16,float16,0,0.03775359988212586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,40,8,128,1,float16,fp8,0,0.039505600929260254
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,40,8,128,1,fp8,fp8,0,0.039452800154685976
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,40,40,128,1,float16,float16,0,0.048502400517463684
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,40,40,128,1,float16,fp8,0,0.04919520020484924
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,40,40,128,1,fp8,fp8,0,0.04902079999446869
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,40,1,128,1,float16,float16,0,0.02699359953403473
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,40,1,128,1,float16,fp8,0,0.028228801488876343
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,40,1,128,1,fp8,fp8,0,0.0282831996679306
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,40,2,128,1,float16,float16,0,0.027105599641799927
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,40,2,128,1,float16,fp8,0,0.02826879918575287
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,40,2,128,1,fp8,fp8,0,0.02857759892940521
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,40,4,128,1,float16,float16,0,0.027544000744819643
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,40,4,128,1,float16,fp8,0,0.02852480113506317
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,40,4,128,1,fp8,fp8,0,0.028537601232528687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,40,8,128,1,float16,float16,0,0.028140801191329955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,40,8,128,1,float16,fp8,0,0.02946079969406128
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,40,1,128,1,float16,fp8,0,0.022433599829673766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,40,8,128,1,fp8,fp8,0,0.029782399535179138
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,40,40,128,1,float16,float16,0,0.031865599751472476
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,40,40,128,1,float16,fp8,0,0.03350079953670502
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,40,40,128,1,fp8,fp8,0,0.03349120020866394
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,40,1,128,1,float16,float16,0,0.021489599347114564
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,40,1,128,1,fp8,fp8,0,0.022254399955272675
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,40,2,128,1,float16,float16,0,0.021745599806308746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,40,2,128,1,float16,fp8,0,0.02236640006303787
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,40,2,128,1,fp8,fp8,0,0.022294400632381438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,40,4,128,1,float16,float16,0,0.021414400637149812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,40,4,128,1,float16,fp8,0,0.02242400050163269
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,40,4,128,1,fp8,fp8,0,0.022526399791240694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,40,8,128,1,float16,float16,0,0.02194720059633255
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,40,8,128,1,float16,fp8,0,0.022494399547576906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,40,8,128,1,fp8,fp8,0,0.022750400006771088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,40,40,128,1,float16,float16,0,0.02294880002737045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,40,40,128,1,float16,fp8,0,0.02346239984035492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,40,40,128,1,fp8,fp8,0,0.023321600258350374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,40,1,128,1,float16,float16,0,0.02033279985189438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,40,1,128,1,float16,fp8,0,0.021166400611400606
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,40,1,128,1,fp8,fp8,0,0.021180799603462218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,40,2,128,1,float16,float16,0,0.020535999536514284
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,40,2,128,1,float16,fp8,0,0.021185599267482758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,40,2,128,1,fp8,fp8,0,0.021315200626850127
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,40,4,128,1,float16,float16,0,0.020688000321388244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,40,4,128,1,float16,fp8,0,0.021456000208854676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,40,4,128,1,fp8,fp8,0,0.02138720005750656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,40,40,128,1,float16,fp8,0,0.021595199406147004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,40,8,128,1,float16,float16,0,0.020795199275016784
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,40,8,128,1,float16,fp8,0,0.021537600457668303
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,40,8,128,1,fp8,fp8,0,0.02192319929599762
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,40,40,128,1,float16,float16,0,0.020662400126457214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,40,40,128,1,fp8,fp8,0,0.021648000180721282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,40,1,128,1,float16,float16,0,0.020073600113391876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,40,1,128,1,float16,fp8,0,0.02085919976234436
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,40,1,128,1,fp8,fp8,0,0.02072480022907257
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,40,2,128,1,float16,float16,0,0.02006240040063858
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,40,2,128,1,float16,fp8,0,0.02083040028810501
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,40,2,128,1,fp8,fp8,0,0.020848000049591066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,40,4,128,1,float16,float16,0,0.020080000162124634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,40,4,128,1,float16,fp8,0,0.021012799441814424
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,40,4,128,1,fp8,fp8,0,0.020878399908542632
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,40,8,128,1,float16,float16,0,0.020641599595546723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,40,8,128,1,float16,fp8,0,0.021273599565029146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,40,8,128,1,fp8,fp8,0,0.020814399421215057
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,40,1,128,1,float16,float16,0,0.13455519676208497
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,40,2,128,1,fp8,fp8,0,0.1559216022491455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,40,1,128,1,float16,fp8,0,0.1481760025024414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,40,1,128,1,fp8,fp8,0,0.14847359657287598
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,40,2,128,1,float16,float16,0,0.14370399713516235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,40,2,128,1,float16,fp8,0,0.15606720447540284
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,40,4,128,1,float16,float16,0,0.17679200172424317
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,40,4,128,1,float16,fp8,0,0.18958719968795776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,40,4,128,1,fp8,fp8,0,0.18966399431228637
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,40,8,128,1,float16,float16,0,0.23700640201568604
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,40,8,128,1,float16,fp8,0,0.2555504083633423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,40,8,128,1,fp8,fp8,0,0.2559583902359009
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,40,1,128,1,fp8,fp8,0,0.08411999940872192
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,40,40,128,1,float16,float16,0,0.3714063882827759
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,40,40,128,1,float16,fp8,0,0.39153759479522704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,40,1,128,1,float16,float16,0,0.07936959862709045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,40,40,128,1,fp8,fp8,0,0.39089279174804686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,40,1,128,1,float16,fp8,0,0.08407040238380432
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,40,2,128,1,float16,float16,0,0.08574560284614563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,40,2,128,1,float16,fp8,0,0.09057279825210571
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,40,2,128,1,fp8,fp8,0,0.08993759751319885
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,40,4,128,1,float16,float16,0,0.10034079551696777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,40,4,128,1,float16,fp8,0,0.10561920404434204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,40,4,128,1,fp8,fp8,0,0.10601600408554077
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,40,8,128,1,float16,float16,0,0.13157119750976562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,40,8,128,1,float16,fp8,0,0.14041119813919067
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,40,8,128,1,fp8,fp8,0,0.14100799560546876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,40,40,128,1,float16,float16,0,0.19857439994812012
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,40,40,128,1,float16,fp8,0,0.20698399543762208
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,40,40,128,1,fp8,fp8,0,0.20708959102630614
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,40,1,128,1,float16,float16,0,0.047275200486183167
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,40,1,128,1,float16,fp8,0,0.04639039933681488
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,40,4,128,1,float16,fp8,0,0.057999998331069946
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,40,1,128,1,fp8,fp8,0,0.045921599864959715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,40,2,128,1,float16,float16,0,0.049511998891830444
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,40,2,128,1,float16,fp8,0,0.04704799950122833
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,40,2,128,1,fp8,fp8,0,0.04731839895248413
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,40,40,128,1,float16,float16,0,0.10908319950103759
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,40,4,128,1,float16,float16,0,0.0607695996761322
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,40,40,128,1,fp8,fp8,0,0.10565279722213745
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,40,4,128,1,fp8,fp8,0,0.05856800079345703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,40,8,128,1,float16,float16,0,0.07624639868736267
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,40,8,128,1,float16,fp8,0,0.07553920149803162
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,40,8,128,1,fp8,fp8,0,0.0763264000415802
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,40,40,128,1,float16,fp8,0,0.1050927996635437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,40,1,128,1,float16,float16,0,0.03211039900779724
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,40,1,128,1,float16,fp8,0,0.034113600850105286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,40,1,128,1,fp8,fp8,0,0.03407680094242096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,40,2,128,1,float16,float16,0,0.032390400767326355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,40,2,128,1,float16,fp8,0,0.03447040021419525
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,40,2,128,1,fp8,fp8,0,0.03428640067577362
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,40,4,128,1,float16,float16,0,0.03353280127048493
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,40,4,128,1,float16,fp8,0,0.03515360057353974
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,40,4,128,1,fp8,fp8,0,0.03511680066585541
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,40,8,128,1,float16,float16,0,0.04253279864788055
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,40,8,128,1,float16,fp8,0,0.04437119960784912
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,40,8,128,1,fp8,fp8,0,0.0441536009311676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,40,40,128,1,float16,float16,0,0.055904000997543335
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,40,40,128,1,float16,fp8,0,0.0582863986492157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,40,40,128,1,fp8,fp8,0,0.058139199018478395
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,40,1,128,1,float16,float16,0,0.02710399925708771
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,40,1,128,1,float16,fp8,0,0.028191998600959778
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,40,1,128,1,fp8,fp8,0,0.028240001201629637
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,40,2,128,1,float16,float16,0,0.026867198944091796
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,40,2,128,1,float16,fp8,0,0.028089600801467895
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,40,2,128,1,fp8,fp8,0,0.02836799919605255
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,40,4,128,1,float16,float16,0,0.02738719880580902
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,40,4,128,1,float16,fp8,0,0.02885279953479767
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,40,4,128,1,fp8,fp8,0,0.028638398647308348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,40,8,128,1,float16,float16,0,0.028092798590660096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,40,8,128,1,float16,fp8,0,0.029743999242782593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,40,8,128,1,fp8,fp8,0,0.02951200008392334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,40,40,128,1,float16,float16,0,0.03612799942493439
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,40,40,128,1,float16,fp8,0,0.03809280097484589
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,40,40,128,1,fp8,fp8,0,0.03813120126724243
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,40,1,128,1,float16,float16,0,0.021512000262737273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,40,1,128,1,float16,fp8,0,0.022383999824523926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,40,1,128,1,fp8,fp8,0,0.022332799434661866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,40,2,128,1,float16,float16,0,0.021587200462818146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,40,2,128,1,float16,fp8,0,0.022619199752807618
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,40,2,128,1,fp8,fp8,0,0.022499200701713563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,40,4,128,1,float16,float16,0,0.021793599426746368
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,40,4,128,1,float16,fp8,0,0.022342400252819063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,40,4,128,1,fp8,fp8,0,0.022495999932289124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,40,8,128,1,float16,float16,0,0.021939200162887574
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,40,8,128,1,float16,fp8,0,0.022814400494098663
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,40,8,128,1,fp8,fp8,0,0.022840000689029694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,40,40,128,1,float16,float16,0,0.026331201195716858
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,40,40,128,1,float16,fp8,0,0.02776640057563782
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,40,40,128,1,fp8,fp8,0,0.027603200078010558
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,40,1,128,1,float16,float16,0,0.020520000159740447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,40,1,128,1,float16,fp8,0,0.021267199516296388
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,40,1,128,1,fp8,fp8,0,0.021422399580478667
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,40,2,128,1,float16,float16,0,0.02050720006227493
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,40,2,128,1,float16,fp8,0,0.021403199434280394
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,40,2,128,1,fp8,fp8,0,0.021561600267887115
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,40,4,128,1,float16,float16,0,0.02067999988794327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,40,4,128,1,float16,fp8,0,0.02128479927778244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,40,4,128,1,fp8,fp8,0,0.021715199947357176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,40,8,128,1,float16,float16,0,0.020844799280166627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,40,8,128,1,float16,fp8,0,0.02157440036535263
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,40,8,128,1,fp8,fp8,0,0.021558399498462676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,40,40,128,1,float16,float16,0,0.020891200006008147
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,40,40,128,1,float16,fp8,0,0.02168000042438507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,40,40,128,1,fp8,fp8,0,0.02160000056028366
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,40,1,128,1,float16,float16,0,0.019950400292873382
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,40,1,128,1,float16,fp8,0,0.020814399421215057
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,40,1,128,1,fp8,fp8,0,0.02080959975719452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,40,2,128,1,float16,float16,0,0.020252799987792967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,40,2,128,1,float16,fp8,0,0.020737600326538087
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,40,2,128,1,fp8,fp8,0,0.02065120041370392
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,40,4,128,1,float16,float16,0,0.019886399805545806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,40,4,128,1,float16,fp8,0,0.021030400693416596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,40,4,128,1,fp8,fp8,0,0.021014399826526642
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,40,8,128,1,float16,float16,0,0.020239999890327452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,40,8,128,1,float16,fp8,0,0.021300800144672394
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,40,8,128,1,fp8,fp8,0,0.02114879935979843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,40,40,128,1,float16,float16,0,0.02040800005197525
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,40,40,128,1,float16,fp8,0,0.020904000103473663
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,40,40,128,1,fp8,fp8,0,0.021054400503635405
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,40,1,128,1,float16,float16,0,0.01945279985666275
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,40,1,128,1,float16,fp8,0,0.020076799392700195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,40,1,128,1,fp8,fp8,0,0.02038560062646866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,40,2,128,1,float16,float16,0,0.01961279958486557
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,40,2,128,1,float16,fp8,0,0.020555199682712556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,40,2,128,1,fp8,fp8,0,0.020300799608230592
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,40,4,128,1,float16,float16,0,0.01971679925918579
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,40,4,128,1,float16,fp8,0,0.02038400024175644
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,40,4,128,1,fp8,fp8,0,0.020657600462436677
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,40,8,128,1,float16,float16,0,0.01974720060825348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,40,8,128,1,float16,fp8,0,0.02091519981622696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,40,8,128,1,fp8,fp8,0,0.02064319998025894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,40,1,128,1,float16,float16,0,0.08052319884300232
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,40,1,128,1,float16,fp8,0,0.0851967990398407
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,40,1,128,1,fp8,fp8,0,0.08520479798316956
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,40,2,128,1,float16,float16,0,0.09323359727859497
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,40,2,128,1,float16,fp8,0,0.09929119944572448
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,40,8,128,1,float16,fp8,0,0.14400479793548585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,40,2,128,1,fp8,fp8,0,0.09907360076904297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,40,4,128,1,float16,float16,0,0.11361119747161866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,40,4,128,1,float16,fp8,0,0.12394239902496337
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,40,4,128,1,fp8,fp8,0,0.1239967942237854
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,40,8,128,1,float16,float16,0,0.13308160305023192
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,40,8,128,1,fp8,fp8,0,0.1434208035469055
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,40,40,128,1,float16,float16,0,0.274945592880249
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,40,40,128,1,float16,fp8,0,0.3005408048629761
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,40,40,128,1,fp8,fp8,0,0.29856319427490235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,40,1,128,1,float16,float16,0,0.04894720017910004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,40,1,128,1,float16,fp8,0,0.047849598526954654
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,40,1,128,1,fp8,fp8,0,0.0466735988855362
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,40,2,128,1,float16,float16,0,0.05262399911880493
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,40,2,128,1,float16,fp8,0,0.05204799771308899
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,40,2,128,1,fp8,fp8,0,0.05228319764137268
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,40,4,128,1,float16,float16,0,0.06846399903297425
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,40,4,128,1,float16,fp8,0,0.06709439754486084
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,40,4,128,1,fp8,fp8,0,0.06642879843711853
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,40,8,128,1,float16,float16,0,0.07827839851379395
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,40,8,128,1,float16,fp8,0,0.07710080146789551
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,40,8,128,1,fp8,fp8,0,0.07626240253448487
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,40,40,128,1,float16,float16,0,0.14830559492111206
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,40,40,128,1,float16,fp8,0,0.15200639963150026
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,40,1,128,1,float16,float16,0,0.032436800003051755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,40,40,128,1,fp8,fp8,0,0.15279359817504884
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,40,1,128,1,float16,fp8,0,0.0344543993473053
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,40,1,128,1,fp8,fp8,0,0.03449600040912628
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,40,2,128,1,float16,float16,0,0.03260799944400787
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,40,2,128,1,float16,fp8,0,0.03487359881401062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,40,2,128,1,fp8,fp8,0,0.0349839985370636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,40,4,128,1,float16,float16,0,0.03704800009727478
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,40,4,128,1,float16,fp8,0,0.040380799770355226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,40,4,128,1,fp8,fp8,0,0.04035199880599975
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,40,40,128,1,float16,fp8,0,0.08121119737625122
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,40,8,128,1,float16,float16,0,0.0418287992477417
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,40,8,128,1,float16,fp8,0,0.0443231999874115
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,40,8,128,1,fp8,fp8,0,0.044443199038505556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,40,40,128,1,float16,float16,0,0.07480480074882508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,40,40,128,1,fp8,fp8,0,0.08182079792022705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,40,1,128,1,float16,float16,0,0.02736639976501465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,40,1,128,1,float16,fp8,0,0.02864319980144501
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,40,1,128,1,fp8,fp8,0,0.02860639989376068
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,40,2,128,1,float16,float16,0,0.02729920148849487
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,40,2,128,1,float16,fp8,0,0.02871679961681366
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,40,2,128,1,fp8,fp8,0,0.028960001468658448
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,40,4,128,1,float16,float16,0,0.027777600288391113
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,40,4,128,1,float16,fp8,0,0.029068800806999206
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,40,4,128,1,fp8,fp8,0,0.028951999545097352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,40,8,128,1,float16,float16,0,0.02810240089893341
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,40,8,128,1,float16,fp8,0,0.02967039942741394
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,40,8,128,1,fp8,fp8,0,0.02964319884777069
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,40,40,128,1,float16,float16,0,0.04391840100288391
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,40,40,128,1,float16,fp8,0,0.04768800139427185
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,40,40,128,1,fp8,fp8,0,0.04748800098896026
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,40,1,128,1,float16,float16,0,0.021480000019073485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,40,1,128,1,float16,fp8,0,0.022249600291252135
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,40,1,128,1,fp8,fp8,0,0.022312000393867493
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,40,2,128,1,float16,float16,0,0.021614399552345277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,40,2,128,1,float16,fp8,0,0.02245279997587204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,40,2,128,1,fp8,fp8,0,0.02252800017595291
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,40,4,128,1,float16,fp8,0,0.02231840044260025
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,40,4,128,1,float16,float16,0,0.021539199352264404
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,40,4,128,1,fp8,fp8,0,0.022284799814224245
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,40,8,128,1,float16,float16,0,0.021768000721931458
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,40,8,128,1,float16,fp8,0,0.022672000527381896
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,40,8,128,1,fp8,fp8,0,0.022521600127220154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,40,40,128,1,float16,float16,0,0.030345600843429566
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,40,40,128,1,float16,fp8,0,0.032471999526023865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,40,40,128,1,fp8,fp8,0,0.032380801439285276
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,40,1,128,1,float16,float16,0,0.0205375999212265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,40,1,128,1,float16,fp8,0,0.021217599511146545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,40,1,128,1,fp8,fp8,0,0.021143999695777894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,40,2,128,1,float16,float16,0,0.020601600408554077
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,40,2,128,1,float16,fp8,0,0.021359999477863312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,40,2,128,1,fp8,fp8,0,0.021206399798393248
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,40,4,128,1,float16,float16,0,0.02064799964427948
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,40,4,128,1,float16,fp8,0,0.021624000370502473
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,40,4,128,1,fp8,fp8,0,0.021491199731826782
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,40,8,128,1,float16,float16,0,0.02043039947748184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,40,8,128,1,float16,fp8,0,0.021476800739765167
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,40,8,128,1,fp8,fp8,0,0.02136480063199997
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,40,40,128,1,float16,float16,0,0.024553599953651428
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,40,40,128,1,float16,fp8,0,0.025838398933410646
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,40,40,128,1,fp8,fp8,0,0.0259552001953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,40,1,128,1,float16,float16,0,0.01993599981069565
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,40,1,128,1,float16,fp8,0,0.020678399503231047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,40,1,128,1,fp8,fp8,0,0.020508800446987153
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,40,2,128,1,float16,float16,0,0.01988160014152527
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,40,2,128,1,float16,fp8,0,0.020953600108623505
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,40,2,128,1,fp8,fp8,0,0.02075680047273636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,40,4,128,1,float16,float16,0,0.020136000216007234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,40,4,128,1,float16,fp8,0,0.020875200629234314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,40,4,128,1,fp8,fp8,0,0.021240000426769257
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,40,8,128,1,float16,float16,0,0.019988800585269927
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,40,8,128,1,float16,fp8,0,0.02075839936733246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,40,8,128,1,fp8,fp8,0,0.020895999670028687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,40,40,128,1,float16,float16,0,0.020707200467586517
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,40,40,128,1,float16,fp8,0,0.021206399798393248
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,40,40,128,1,fp8,fp8,0,0.021483199298381807
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,40,1,128,1,float16,float16,0,0.01929440051317215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,40,1,128,1,float16,fp8,0,0.020499199628829956
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,40,1,128,1,fp8,fp8,0,0.020633600652217865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,40,2,128,1,float16,float16,0,0.019734400510787963
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,40,2,128,1,float16,fp8,0,0.020520000159740447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,40,2,128,1,fp8,fp8,0,0.02072640061378479
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,40,4,128,1,float16,float16,0,0.019947199523448943
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,40,4,128,1,float16,fp8,0,0.020708799362182617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,40,4,128,1,fp8,fp8,0,0.020494399964809416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,40,8,128,1,float16,float16,0,0.019985599815845488
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,40,8,128,1,float16,fp8,0,0.020623999834060668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,40,8,128,1,fp8,fp8,0,0.02059040069580078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,40,40,128,1,float16,float16,0,0.020206399261951447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,40,40,128,1,float16,fp8,0,0.02117439955472946
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,40,40,128,1,fp8,fp8,0,0.020929600298404693
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,40,1,128,1,float16,float16,0,0.01945440024137497
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,40,1,128,1,float16,fp8,0,0.020294399559497835
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,40,1,128,1,fp8,fp8,0,0.0205375999212265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,40,2,128,1,float16,float16,0,0.019475199282169342
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,40,2,128,1,float16,fp8,0,0.02020000070333481
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,40,2,128,1,fp8,fp8,0,0.020263999700546265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,40,4,128,1,float16,float16,0,0.01961279958486557
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,40,4,128,1,float16,fp8,0,0.020363199710845947
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,40,4,128,1,fp8,fp8,0,0.020360000431537628
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,40,8,128,1,float16,float16,0,0.019388799369335175
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,40,8,128,1,float16,fp8,0,0.020436799526214598
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,40,8,128,1,fp8,fp8,0,0.020304000377655028
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,40,1,128,1,float16,float16,0,0.029716798663139345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,40,1,128,1,float16,fp8,0,0.03191519975662231
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,40,1,128,1,fp8,fp8,0,0.03160319924354553
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,40,2,128,1,float16,float16,0,0.03727520108222961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,40,2,128,1,float16,fp8,0,0.040643200278282166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,40,2,128,1,fp8,fp8,0,0.04060479998588562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,40,4,128,1,float16,float16,0,0.051937597990036014
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,40,4,128,1,float16,fp8,0,0.058019202947616574
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,40,4,128,1,fp8,fp8,0,0.05853279829025269
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,40,8,128,1,float16,float16,0,0.0808143973350525
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,40,8,128,1,float16,fp8,0,0.09433760046958924
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,40,8,128,1,fp8,fp8,0,0.09422720074653626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,40,40,128,1,float16,float16,0,0.1641535997390747
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,40,40,128,1,float16,fp8,0,0.19896160364151
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,40,1,128,1,float16,float16,0,0.02287680059671402
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,40,40,128,1,fp8,fp8,0,0.1980448007583618
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,40,1,128,1,float16,fp8,0,0.023721599578857423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,40,1,128,1,fp8,fp8,0,0.023686400055885314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,40,2,128,1,float16,float16,0,0.02625280022621155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,40,2,128,1,float16,fp8,0,0.028172799944877626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,40,2,128,1,fp8,fp8,0,0.028516799211502075
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,40,4,128,1,float16,float16,0,0.03442400097846985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,40,4,128,1,float16,fp8,0,0.03704319894313812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,40,4,128,1,fp8,fp8,0,0.03746080100536346
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,40,8,128,1,float16,float16,0,0.048614400625228885
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,40,40,128,1,fp8,fp8,0,0.10854719877243042
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,40,8,128,1,float16,fp8,0,0.055255997180938723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,40,8,128,1,fp8,fp8,0,0.055262398719787595
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,40,40,128,1,float16,float16,0,0.09268640279769898
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,40,40,128,1,float16,fp8,0,0.10933279991149902
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,40,1,128,1,float16,float16,0,0.021129600703716278
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,40,1,128,1,float16,fp8,0,0.022033600509166716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,40,1,128,1,fp8,fp8,0,0.021723200380802155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,40,2,128,1,float16,float16,0,0.020982399582862854
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,40,2,128,1,float16,fp8,0,0.02229599952697754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,40,2,128,1,fp8,fp8,0,0.022332799434661866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,40,4,128,1,float16,float16,0,0.025167998671531678
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,40,4,128,1,float16,fp8,0,0.02672480046749115
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,40,4,128,1,fp8,fp8,0,0.026807999610900878
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,40,8,128,1,float16,float16,0,0.032583999633789065
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,40,8,128,1,float16,fp8,0,0.035601601004600525
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,40,8,128,1,fp8,fp8,0,0.035913598537445066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,40,40,128,1,float16,float16,0,0.05480960011482239
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,40,40,128,1,float16,fp8,0,0.06286720037460328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,40,40,128,1,fp8,fp8,0,0.06323360204696656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,40,1,128,1,float16,float16,0,0.019944000244140624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,40,1,128,1,float16,fp8,0,0.0209647998213768
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,40,1,128,1,fp8,fp8,0,0.02083519995212555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,40,2,128,1,float16,float16,0,0.02014240026473999
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,40,2,128,1,float16,fp8,0,0.021063999831676485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,40,2,128,1,fp8,fp8,0,0.02120800018310547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,40,4,128,1,float16,float16,0,0.020633600652217865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,40,4,128,1,float16,fp8,0,0.02160319983959198
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,40,4,128,1,fp8,fp8,0,0.021321600675582884
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,40,8,128,1,float16,float16,0,0.024582399427890776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,40,8,128,1,float16,fp8,0,0.025956800580024718
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,40,8,128,1,fp8,fp8,0,0.02592160105705261
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,40,40,128,1,float16,float16,0,0.036051198840141296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,40,40,128,1,float16,fp8,0,0.03967680037021637
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,40,40,128,1,fp8,fp8,0,0.03954240083694458
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,40,1,128,1,float16,float16,0,0.019380800426006317
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,40,1,128,1,float16,fp8,0,0.020339199900627138
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,40,1,128,1,fp8,fp8,0,0.020367999374866486
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,40,2,128,1,float16,float16,0,0.019249600172042847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,40,2,128,1,float16,fp8,0,0.020446400344371795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,40,2,128,1,fp8,fp8,0,0.02069759964942932
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,40,4,128,1,float16,float16,0,0.019857600331306458
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,40,4,128,1,float16,fp8,0,0.020656000077724456
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,40,4,128,1,fp8,fp8,0,0.020585599541664123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,40,8,128,1,float16,float16,0,0.020283199846744537
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,40,8,128,1,float16,fp8,0,0.021193599700927733
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,40,8,128,1,fp8,fp8,0,0.02109439969062805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,40,40,128,1,float16,float16,0,0.027747198939323425
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,40,40,128,1,float16,fp8,0,0.029937601089477538
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,40,40,128,1,fp8,fp8,0,0.030156800150871278
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,40,1,128,1,float16,float16,0,0.019124799966812135
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,40,1,128,1,float16,fp8,0,0.02008959949016571
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,40,1,128,1,fp8,fp8,0,0.020033599436283113
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,40,2,128,1,float16,float16,0,0.019393600523471832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,40,2,128,1,float16,fp8,0,0.02018879950046539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,40,2,128,1,fp8,fp8,0,0.02008800059556961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,40,4,128,1,float16,float16,0,0.019463999569416045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,40,4,128,1,float16,fp8,0,0.020654399693012238
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,40,4,128,1,fp8,fp8,0,0.02062239944934845
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,40,1,128,1,float16,float16,0,0.01913599967956543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,40,8,128,1,float16,float16,0,0.01993280053138733
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,40,8,128,1,float16,fp8,0,0.020659199357032774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,40,8,128,1,fp8,fp8,0,0.020798400044441223
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,40,40,128,1,float16,float16,0,0.023419199883937834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,40,40,128,1,float16,fp8,0,0.02476159930229187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,40,40,128,1,fp8,fp8,0,0.024857600033283234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,40,1,128,1,fp8,fp8,0,0.020076799392700195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,40,1,128,1,float16,fp8,0,0.019939200580120088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,40,2,128,1,float16,float16,0,0.019193600118160247
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,40,2,128,1,float16,fp8,0,0.019764800369739533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,40,2,128,1,fp8,fp8,0,0.019952000677585603
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,40,4,128,1,float16,float16,0,0.0191103994846344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,40,4,128,1,float16,fp8,0,0.019926400482654573
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,40,4,128,1,fp8,fp8,0,0.020047999918460846
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,40,8,128,1,float16,float16,0,0.019356800615787505
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,40,8,128,1,float16,fp8,0,0.020563200116157532
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,40,8,128,1,fp8,fp8,0,0.020231999456882477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,40,40,128,1,float16,float16,0,0.01969279944896698
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,40,40,128,1,float16,fp8,0,0.02045599967241287
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,40,40,128,1,fp8,fp8,0,0.02027679979801178
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,40,1,128,1,float16,float16,0,0.01698880046606064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,40,1,128,1,float16,fp8,0,0.01801439970731735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,40,1,128,1,fp8,fp8,0,0.018059200048446654
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,40,2,128,1,float16,float16,0,0.019092799723148347
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,40,2,128,1,float16,fp8,0,0.019494399428367615
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,40,2,128,1,fp8,fp8,0,0.019630399346351624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,40,4,128,1,float16,float16,0,0.01897760033607483
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,40,4,128,1,float16,fp8,0,0.019817599654197694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,40,4,128,1,fp8,fp8,0,0.019952000677585603
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,40,8,128,1,float16,float16,0,0.01913599967956543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,40,8,128,1,float16,fp8,0,0.020094400644302367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,40,8,128,1,fp8,fp8,0,0.019827200472354888
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,40,40,128,1,float16,float16,0,0.01924159973859787
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,40,40,128,1,float16,fp8,0,0.02006080001592636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,40,40,128,1,fp8,fp8,0,0.02006399929523468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,40,1,128,1,float16,float16,0,0.016361600160598753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,40,1,128,1,float16,fp8,0,0.017292800545692443
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,40,1,128,1,fp8,fp8,0,0.017051200568675994
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,40,2,128,1,float16,float16,0,0.01704320013523102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,40,2,128,1,float16,fp8,0,0.0177279993891716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,40,2,128,1,fp8,fp8,0,0.01791519969701767
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,40,4,128,1,float16,float16,0,0.018643200397491455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,40,4,128,1,float16,fp8,0,0.019467200338840484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,40,4,128,1,fp8,fp8,0,0.019734400510787963
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,40,8,128,1,float16,float16,0,0.01881439983844757
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,40,8,128,1,float16,fp8,0,0.019808000326156615
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,40,8,128,1,fp8,fp8,0,0.01966399997472763
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,32,1,128,1,fp8,fp8,0,25.326434326171874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,32,1,128,1,float16,fp8,0,25.997442626953124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,32,2,128,1,float16,fp8,0,25.83448486328125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,32,2,128,1,fp8,fp8,0,25.75057373046875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,32,4,128,1,float16,fp8,0,26.03045654296875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,32,1,128,1,float16,float16,0,30.385076904296874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,32,2,128,1,float16,float16,0,30.742523193359375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,32,4,128,1,float16,float16,0,30.818789672851562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,32,32,128,1,float16,fp8,0,15.043533325195312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,32,32,128,1,float16,float16,0,18.314369201660156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,32,32,128,1,fp8,fp8,0,15.317041015625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,32,1,128,1,float16,float16,0,15.863542175292968
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,32,4,128,1,fp8,fp8,0,26.239251708984376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,32,8,128,1,float16,fp8,0,27.336203002929686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,32,8,128,1,fp8,fp8,0,27.283053588867187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,32,8,128,1,float16,float16,0,32.82998046875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,32,1,128,1,float16,fp8,0,12.837115478515624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,32,1,128,1,fp8,fp8,0,12.821638488769532
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,32,2,128,1,float16,fp8,0,12.873580932617188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,32,2,128,1,float16,float16,0,15.524822998046876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,32,2,128,1,fp8,fp8,0,12.723115539550781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,32,4,128,1,float16,fp8,0,13.039710998535156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,32,4,128,1,float16,float16,0,15.927838134765626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,32,32,128,1,float16,float16,0,9.023694610595703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,32,4,128,1,fp8,fp8,0,13.241062927246094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,32,8,128,1,float16,fp8,0,13.303663635253907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,32,32,128,1,float16,fp8,0,7.4150848388671875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,32,8,128,1,fp8,fp8,0,13.27008056640625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,32,8,128,1,float16,float16,0,16.45630645751953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,32,32,128,1,fp8,fp8,0,7.548115539550781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,32,1,128,1,float16,float16,0,7.9107200622558596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,32,1,128,1,float16,fp8,0,6.476351928710938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,32,1,128,1,fp8,fp8,0,6.569942474365234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,32,2,128,1,float16,fp8,0,6.356455993652344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,32,2,128,1,float16,float16,0,7.9798126220703125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,32,2,128,1,fp8,fp8,0,6.48966064453125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,32,4,128,1,float16,fp8,0,6.569647979736328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,32,4,128,1,float16,float16,0,7.896038055419922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,32,4,128,1,fp8,fp8,0,6.525899505615234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,32,32,128,1,float16,fp8,0,3.7266944885253905
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,32,32,128,1,float16,float16,0,4.329872131347656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,32,8,128,1,float16,fp8,0,6.737886047363281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,32,8,128,1,float16,float16,0,7.920954895019531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,32,32,128,1,fp8,fp8,0,3.716579055786133
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,32,8,128,1,fp8,fp8,0,6.554595184326172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,32,1,128,1,float16,float16,0,3.705614471435547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,32,1,128,1,float16,fp8,0,3.172108840942383
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,32,1,128,1,fp8,fp8,0,3.161564826965332
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,32,2,128,1,float16,float16,0,3.794657516479492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,32,2,128,1,float16,fp8,0,3.2393215179443358
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,32,4,128,1,float16,fp8,0,3.2102176666259767
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,32,2,128,1,fp8,fp8,0,3.2542015075683595
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,32,4,128,1,fp8,fp8,0,3.306273651123047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,32,4,128,1,float16,float16,0,3.6866527557373048
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,32,8,128,1,float16,float16,0,3.8842750549316407
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,32,8,128,1,float16,fp8,0,3.2844001770019533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,32,8,128,1,fp8,fp8,0,3.3675151824951173
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,32,1,128,1,float16,fp8,0,14.416079711914062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,32,1,128,1,fp8,fp8,0,14.648809814453125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,32,2,128,1,float16,fp8,0,14.703073120117187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,32,2,128,1,fp8,fp8,0,14.790823364257813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,32,1,128,1,float16,float16,0,18.363200378417968
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,32,4,128,1,float16,fp8,0,14.83472900390625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,32,2,128,1,float16,float16,0,17.57802734375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,32,4,128,1,float16,float16,0,17.726812744140624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,32,32,128,1,float16,fp8,0,8.835208129882812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,32,32,128,1,float16,float16,0,10.751663970947266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,32,32,128,1,fp8,fp8,0,8.853463745117187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,32,4,128,1,fp8,fp8,0,14.77544708251953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,32,8,128,1,float16,fp8,0,15.477339172363282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,32,8,128,1,fp8,fp8,0,15.764845275878907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,32,8,128,1,float16,float16,0,18.867564392089843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,32,1,128,1,float16,fp8,0,7.242575836181641
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,32,1,128,1,float16,float16,0,8.492005157470704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,32,1,128,1,fp8,fp8,0,7.377705383300781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,32,2,128,1,float16,fp8,0,7.487611389160156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,32,2,128,1,float16,float16,0,8.963616180419923
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,32,2,128,1,fp8,fp8,0,7.5784141540527346
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,32,4,128,1,float16,fp8,0,7.536211395263672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,32,4,128,1,float16,float16,0,9.188120269775391
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,32,4,128,1,fp8,fp8,0,7.487840270996093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,32,32,128,1,float16,float16,0,5.063678359985351
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,32,8,128,1,float16,fp8,0,7.758477020263672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,32,32,128,1,float16,fp8,0,4.435457611083985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,32,8,128,1,float16,float16,0,9.040184020996094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,32,8,128,1,fp8,fp8,0,7.557371520996094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,32,1,128,1,float16,fp8,0,3.604497528076172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,32,32,128,1,fp8,fp8,0,4.425868988037109
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,32,1,128,1,float16,float16,0,4.283832168579101
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,32,1,128,1,fp8,fp8,0,3.6472286224365233
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,32,2,128,1,float16,fp8,0,3.6524913787841795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,32,2,128,1,float16,float16,0,4.424884796142578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,32,2,128,1,fp8,fp8,0,3.6960304260253904
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,32,4,128,1,float16,float16,0,4.3411102294921875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,32,4,128,1,float16,fp8,0,3.690875244140625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,32,4,128,1,fp8,fp8,0,3.7039520263671877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,32,32,128,1,float16,fp8,0,2.239753532409668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,32,32,128,1,float16,float16,0,2.6353679656982423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,32,8,128,1,float16,fp8,0,3.803721618652344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,32,8,128,1,float16,float16,0,4.452179336547852
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,32,8,128,1,fp8,fp8,0,3.812483215332031
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,32,1,128,1,float16,float16,0,2.0104352951049806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,32,32,128,1,fp8,fp8,0,2.483598327636719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,32,1,128,1,float16,fp8,0,1.8162607192993163
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,32,1,128,1,fp8,fp8,0,1.811787223815918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,32,2,128,1,float16,fp8,0,1.8299600601196289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,32,2,128,1,float16,float16,0,2.108336067199707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,32,2,128,1,fp8,fp8,0,1.8973983764648437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,32,4,128,1,float16,float16,0,2.0689279556274416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,32,4,128,1,float16,fp8,0,1.9742671966552734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,32,4,128,1,fp8,fp8,0,1.8598272323608398
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,32,8,128,1,float16,float16,0,2.1561616897583007
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,32,8,128,1,float16,fp8,0,1.9090240478515625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,32,8,128,1,fp8,fp8,0,1.906937599182129
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,32,1,128,1,float16,fp8,0,10.300129699707032
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,32,1,128,1,fp8,fp8,0,10.283112335205079
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,32,2,128,1,float16,fp8,0,10.191120147705078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,32,2,128,1,fp8,fp8,0,10.408900451660156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,32,4,128,1,float16,fp8,0,10.387175750732421
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,32,2,128,1,float16,float16,0,12.190523529052735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,32,1,128,1,float16,float16,0,12.918707275390625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,32,4,128,1,float16,float16,0,12.500138854980468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,32,32,128,1,float16,fp8,0,6.5412353515625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,32,32,128,1,float16,float16,0,7.3368385314941404
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,32,32,128,1,fp8,fp8,0,6.5666259765625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,32,1,128,1,float16,float16,0,5.997977447509766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,32,4,128,1,fp8,fp8,0,10.568606567382812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,32,8,128,1,float16,fp8,0,10.708814239501953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,32,8,128,1,fp8,fp8,0,10.967388916015626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,32,8,128,1,float16,float16,0,13.141770935058593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,32,1,128,1,float16,fp8,0,5.107201766967774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,32,1,128,1,fp8,fp8,0,5.084673690795898
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,32,2,128,1,float16,fp8,0,5.185784149169922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,32,2,128,1,float16,float16,0,6.0623023986816404
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,32,2,128,1,fp8,fp8,0,5.152139282226562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,32,4,128,1,float16,float16,0,6.139305496215821
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,32,4,128,1,float16,fp8,0,5.267111968994141
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,32,4,128,1,fp8,fp8,0,5.3600624084472654
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,32,32,128,1,float16,float16,0,3.732094573974609
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,32,32,128,1,float16,fp8,0,3.250019073486328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,32,8,128,1,float16,fp8,0,5.383598327636719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,32,8,128,1,float16,float16,0,6.397931289672852
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,32,8,128,1,fp8,fp8,0,5.379483032226562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,32,1,128,1,float16,float16,0,3.0000511169433595
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,32,32,128,1,fp8,fp8,0,3.3788688659667967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,32,1,128,1,float16,fp8,0,2.5512943267822266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,32,1,128,1,fp8,fp8,0,2.6279584884643556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,32,2,128,1,float16,float16,0,2.753727912902832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,32,2,128,1,float16,fp8,0,2.568670463562012
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,32,2,128,1,fp8,fp8,0,2.8190095901489256
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,32,4,128,1,float16,float16,0,2.9912736892700194
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,32,4,128,1,float16,fp8,0,2.8137184143066407
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,32,4,128,1,fp8,fp8,0,2.6217039108276365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,32,8,128,1,float16,float16,0,2.9675008773803713
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,32,32,128,1,float16,float16,0,1.7754512786865235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,32,8,128,1,float16,fp8,0,2.707904052734375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,32,32,128,1,float16,fp8,0,1.6568944931030274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,32,8,128,1,fp8,fp8,0,2.935073661804199
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,32,1,128,1,float16,float16,0,1.3972991943359374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,32,1,128,1,float16,fp8,0,1.289367961883545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,32,32,128,1,fp8,fp8,0,1.757975959777832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,32,1,128,1,fp8,fp8,0,1.5555456161499024
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,32,2,128,1,float16,float16,0,1.4126879692077636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,32,2,128,1,float16,fp8,0,1.3044431686401368
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,32,2,128,1,fp8,fp8,0,1.2992783546447755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,32,4,128,1,float16,float16,0,1.4395312309265136
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,32,4,128,1,float16,fp8,0,1.3623984336853028
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,32,4,128,1,fp8,fp8,0,1.3206447601318358
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,32,8,128,1,float16,float16,0,1.4587136268615724
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,32,8,128,1,float16,fp8,0,1.3818911552429198
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,32,8,128,1,fp8,fp8,0,1.426414394378662
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,32,1,128,1,float16,fp8,0,13.356846618652344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,32,1,128,1,fp8,fp8,0,13.387937927246094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,32,2,128,1,float16,fp8,0,13.365902709960938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,32,2,128,1,fp8,fp8,0,13.519296264648437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,32,1,128,1,float16,float16,0,15.82340087890625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,32,2,128,1,float16,float16,0,15.847489929199218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,32,4,128,1,float16,float16,0,16.181358337402344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,32,4,128,1,float16,fp8,0,13.926513671875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,32,32,128,1,float16,fp8,0,8.812838745117187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,32,32,128,1,float16,float16,0,9.864374542236328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,32,32,128,1,fp8,fp8,0,8.810340881347656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,32,4,128,1,fp8,fp8,0,13.939779663085938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,32,8,128,1,float16,fp8,0,14.171501159667969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,32,8,128,1,fp8,fp8,0,14.313032531738282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,32,8,128,1,float16,float16,0,17.134844970703124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,32,1,128,1,float16,float16,0,7.920162963867187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,32,1,128,1,float16,fp8,0,6.619118499755859
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,32,1,128,1,fp8,fp8,0,6.736300659179688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,32,2,128,1,float16,float16,0,7.999420928955078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,32,2,128,1,float16,fp8,0,6.873390197753906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,32,2,128,1,fp8,fp8,0,6.906073760986328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,32,4,128,1,float16,float16,0,8.70792007446289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,32,4,128,1,fp8,fp8,0,6.967027282714843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,32,4,128,1,float16,fp8,0,7.146663665771484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,32,32,128,1,float16,float16,0,5.05785903930664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,32,8,128,1,float16,fp8,0,7.097430419921875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,32,32,128,1,float16,fp8,0,4.479462432861328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,32,8,128,1,float16,float16,0,8.470873260498047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,32,8,128,1,fp8,fp8,0,7.092494201660156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,32,1,128,1,float16,fp8,0,3.3439537048339845
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,32,1,128,1,float16,float16,0,3.9141902923583984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,32,32,128,1,fp8,fp8,0,4.623870468139648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,32,1,128,1,fp8,fp8,0,3.392865753173828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,32,2,128,1,float16,fp8,0,3.3566638946533205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,32,2,128,1,float16,float16,0,3.835523223876953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,32,2,128,1,fp8,fp8,0,3.4098064422607424
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,32,4,128,1,float16,fp8,0,3.4278705596923826
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,32,4,128,1,float16,float16,0,3.921345520019531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,32,4,128,1,fp8,fp8,0,3.9166831970214844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,32,8,128,1,float16,float16,0,4.028910446166992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,32,32,128,1,float16,fp8,0,2.2208080291748047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,32,32,128,1,float16,float16,0,2.547657585144043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,32,8,128,1,float16,fp8,0,3.5670337677001953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,32,8,128,1,fp8,fp8,0,3.551193618774414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,32,1,128,1,float16,float16,0,1.8522144317626954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,32,32,128,1,fp8,fp8,0,2.2108911514282226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,32,1,128,1,float16,fp8,0,1.6851791381835937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,32,1,128,1,fp8,fp8,0,1.7743648529052733
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,32,2,128,1,float16,float16,0,1.8184959411621093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,32,2,128,1,float16,fp8,0,1.7430400848388672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,32,2,128,1,fp8,fp8,0,1.7083375930786133
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,32,4,128,1,float16,float16,0,1.8869216918945313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,32,4,128,1,float16,fp8,0,1.7942127227783202
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,32,4,128,1,fp8,fp8,0,1.7119888305664062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,32,32,128,1,float16,float16,0,1.194718360900879
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,32,32,128,1,float16,fp8,0,1.1221776008605957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,32,8,128,1,float16,fp8,0,1.8081680297851563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,32,8,128,1,float16,float16,0,1.918756866455078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,32,8,128,1,fp8,fp8,0,1.8723119735717773
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,32,1,128,1,float16,float16,0,0.902131175994873
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,32,1,128,1,float16,fp8,0,0.847697639465332
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,32,32,128,1,fp8,fp8,0,1.2119199752807617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,32,1,128,1,fp8,fp8,0,0.8597760200500488
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,32,2,128,1,float16,float16,0,0.9137855529785156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,32,2,128,1,float16,fp8,0,0.856172752380371
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,32,2,128,1,fp8,fp8,0,0.8749135971069336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,32,4,128,1,float16,float16,0,0.9165424346923828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,32,4,128,1,float16,fp8,0,0.9059616088867187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,32,4,128,1,fp8,fp8,0,0.8754608154296875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,32,8,128,1,float16,float16,0,0.9830176353454589
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,32,8,128,1,float16,fp8,0,0.9107407569885254
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,32,8,128,1,fp8,fp8,0,0.9179167747497559
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,32,1,128,1,float16,fp8,0,7.845950317382813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,32,1,128,1,fp8,fp8,0,7.645428466796875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,32,1,128,1,float16,float16,0,9.049966430664062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,32,2,128,1,float16,fp8,0,7.835665893554688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,32,2,128,1,fp8,fp8,0,7.813433837890625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,32,2,128,1,float16,float16,0,9.112750244140624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,32,4,128,1,float16,fp8,0,8.035137939453126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,32,4,128,1,float16,float16,0,9.355372619628906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,32,1,128,1,float16,float16,0,4.427670288085937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,32,32,128,1,float16,fp8,0,5.576396942138672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,32,32,128,1,float16,float16,0,6.058798217773438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,32,32,128,1,fp8,fp8,0,5.482531356811523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,32,4,128,1,fp8,fp8,0,8.096182250976563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,32,8,128,1,float16,fp8,0,8.466702270507813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,32,8,128,1,fp8,fp8,0,8.435860443115235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,32,8,128,1,float16,float16,0,9.797322845458984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,32,1,128,1,float16,fp8,0,3.951558303833008
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,32,1,128,1,fp8,fp8,0,3.832868957519531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,32,2,128,1,float16,fp8,0,3.9003311157226563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,32,2,128,1,float16,float16,0,4.55407829284668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,32,2,128,1,fp8,fp8,0,3.915264129638672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,32,4,128,1,float16,fp8,0,3.9982513427734374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,32,4,128,1,float16,float16,0,4.629619216918945
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,32,4,128,1,fp8,fp8,0,4.122391891479492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,32,32,128,1,float16,float16,0,3.0399023056030274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,32,8,128,1,float16,fp8,0,4.200763320922851
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,32,8,128,1,float16,float16,0,4.8041728973388675
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,32,8,128,1,fp8,fp8,0,4.190776062011719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,32,1,128,1,float16,float16,0,2.0780031204223635
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,32,32,128,1,float16,fp8,0,2.753376007080078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,32,32,128,1,fp8,fp8,0,2.8920591354370115
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,32,1,128,1,float16,fp8,0,1.9956544876098632
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,32,1,128,1,fp8,fp8,0,1.9381311416625977
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,32,2,128,1,float16,float16,0,2.1878448486328126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,32,2,128,1,float16,fp8,0,1.9561967849731445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,32,2,128,1,fp8,fp8,0,2.0190256118774412
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,32,4,128,1,float16,float16,0,2.195308876037598
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,32,4,128,1,float16,fp8,0,2.01068000793457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,32,4,128,1,fp8,fp8,0,2.019708824157715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,32,32,128,1,float16,float16,0,1.5013824462890626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,32,8,128,1,float16,float16,0,2.2998224258422852
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,32,8,128,1,float16,fp8,0,2.109000015258789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,32,32,128,1,float16,fp8,0,1.3993247985839843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,32,8,128,1,fp8,fp8,0,2.229419136047363
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,32,1,128,1,float16,float16,0,1.029190444946289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,32,32,128,1,fp8,fp8,0,1.4787712097167969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,32,1,128,1,float16,fp8,0,0.9803055763244629
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,32,1,128,1,fp8,fp8,0,0.9778191566467285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,32,2,128,1,float16,float16,0,1.0759615898132324
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,32,2,128,1,float16,fp8,0,0.9918911933898926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,32,2,128,1,fp8,fp8,0,0.9931280136108398
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,32,4,128,1,fp8,fp8,0,1.0237919807434082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,32,4,128,1,float16,float16,0,1.0715359687805175
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,32,4,128,1,float16,fp8,0,1.0263903617858887
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,32,8,128,1,float16,float16,0,1.1348079681396483
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,32,8,128,1,float16,fp8,0,1.0681695938110352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,32,32,128,1,float16,float16,0,0.7679999828338623
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,32,1,128,1,float16,fp8,0,0.4981264114379883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,32,8,128,1,fp8,fp8,0,1.0814288139343262
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,32,32,128,1,float16,fp8,0,0.7135519981384277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,32,32,128,1,fp8,fp8,0,0.7124063968658447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,32,1,128,1,float16,float16,0,0.5260784149169921
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,32,1,128,1,fp8,fp8,0,0.5024960041046143
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,32,2,128,1,float16,float16,0,0.537936019897461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,32,2,128,1,float16,fp8,0,0.510536003112793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,32,2,128,1,fp8,fp8,0,0.5096464157104492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,32,4,128,1,float16,float16,0,0.5519375801086426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,32,4,128,1,float16,fp8,0,0.5219344139099121
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,32,4,128,1,fp8,fp8,0,0.5217567920684815
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,32,8,128,1,float16,float16,0,0.5755551815032959
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,32,8,128,1,float16,fp8,0,0.5495183944702149
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,32,8,128,1,fp8,fp8,0,0.5511856079101562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,32,1,128,1,float16,fp8,0,7.22357406616211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,32,1,128,1,fp8,fp8,0,7.200971221923828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,32,1,128,1,float16,float16,0,8.475759887695313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,32,2,128,1,float16,fp8,0,7.375730895996094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,32,2,128,1,fp8,fp8,0,7.32091064453125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,32,2,128,1,float16,float16,0,8.519966125488281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,32,4,128,1,float16,fp8,0,7.630641937255859
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,32,4,128,1,float16,float16,0,8.875977325439454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,32,1,128,1,float16,float16,0,4.122739028930664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,32,32,128,1,float16,fp8,0,5.859844970703125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,32,32,128,1,float16,float16,0,6.2478382110595705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,32,32,128,1,fp8,fp8,0,5.828249740600586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,32,4,128,1,fp8,fp8,0,7.632266998291016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,32,8,128,1,float16,fp8,0,8.227590179443359
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,32,8,128,1,fp8,fp8,0,8.148741149902344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,32,8,128,1,float16,float16,0,9.340795135498047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,32,1,128,1,float16,fp8,0,3.710094451904297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,32,1,128,1,fp8,fp8,0,3.6196239471435545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,32,2,128,1,float16,fp8,0,3.684630584716797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,32,2,128,1,fp8,fp8,0,3.6931472778320313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,32,2,128,1,float16,float16,0,4.199934387207032
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,32,4,128,1,float16,fp8,0,3.8097198486328123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,32,4,128,1,float16,float16,0,4.421252822875976
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,32,4,128,1,fp8,fp8,0,3.8634334564208985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,32,32,128,1,float16,float16,0,3.1017391204833986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,32,8,128,1,float16,float16,0,4.632886505126953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,32,32,128,1,float16,fp8,0,2.9792207717895507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,32,8,128,1,float16,fp8,0,4.0957489013671875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,32,1,128,1,float16,float16,0,2.0011648178100585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,32,1,128,1,float16,fp8,0,1.824612808227539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,32,8,128,1,fp8,fp8,0,4.102024078369141
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,32,32,128,1,fp8,fp8,0,2.9231216430664064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,32,1,128,1,fp8,fp8,0,1.8435375213623046
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,32,2,128,1,float16,float16,0,2.0093488693237305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,32,2,128,1,float16,fp8,0,1.8584112167358398
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,32,2,128,1,fp8,fp8,0,1.8409151077270507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,32,4,128,1,float16,float16,0,2.132609558105469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,32,4,128,1,float16,fp8,0,1.9336591720581056
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,32,4,128,1,fp8,fp8,0,1.9251344680786133
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,32,8,128,1,float16,float16,0,2.2315759658813477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,32,8,128,1,float16,fp8,0,2.057102394104004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,32,32,128,1,float16,float16,0,1.5531040191650392
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,32,1,128,1,float16,float16,0,0.9694656372070313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,32,32,128,1,float16,fp8,0,1.4710240364074707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,32,8,128,1,fp8,fp8,0,2.125662422180176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,32,1,128,1,float16,fp8,0,0.9269295692443847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,32,32,128,1,fp8,fp8,0,1.5511440277099608
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,32,1,128,1,fp8,fp8,0,1.0283727645874023
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,32,2,128,1,float16,float16,0,0.9838576316833496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,32,2,128,1,float16,fp8,0,0.9468159675598145
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,32,2,128,1,fp8,fp8,0,0.9537872314453125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,32,4,128,1,float16,float16,0,1.0095696449279785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,32,4,128,1,fp8,fp8,0,0.9764512062072754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,32,4,128,1,float16,fp8,0,0.9769472122192383
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,32,8,128,1,float16,float16,0,1.0970687866210938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,32,8,128,1,float16,fp8,0,1.0513216018676759
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,32,8,128,1,fp8,fp8,0,1.0490768432617188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,32,32,128,1,float16,float16,0,0.7807536125183105
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,32,32,128,1,float16,fp8,0,0.7506688117980957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,32,1,128,1,float16,float16,0,0.499783992767334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,32,32,128,1,fp8,fp8,0,0.7505871772766113
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,32,1,128,1,float16,fp8,0,0.4758063793182373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,32,1,128,1,fp8,fp8,0,0.4813392162322998
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,32,2,128,1,float16,float16,0,0.5031599998474121
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,32,2,128,1,float16,fp8,0,0.4846335887908936
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,32,2,128,1,fp8,fp8,0,0.4830912113189697
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,32,4,128,1,float16,float16,0,0.5271999835968018
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,32,8,128,1,float16,float16,0,0.5623504161834717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,32,4,128,1,float16,fp8,0,0.5009568214416504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,32,4,128,1,fp8,fp8,0,0.5002831935882568
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,32,8,128,1,float16,fp8,0,0.5361072063446045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,32,8,128,1,fp8,fp8,0,0.5371871948242187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,32,32,128,1,float16,float16,0,0.402620792388916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,32,32,128,1,float16,fp8,0,0.3878047943115234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,32,32,128,1,fp8,fp8,0,0.38896799087524414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,32,1,128,1,float16,float16,0,0.25868160724639894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,32,1,128,1,float16,fp8,0,0.24886720180511473
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,32,1,128,1,fp8,fp8,0,0.2481328010559082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,32,2,128,1,float16,float16,0,0.2663840055465698
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,32,2,128,1,float16,fp8,0,0.25509440898895264
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,32,2,128,1,fp8,fp8,0,0.25622398853302003
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,32,4,128,1,float16,float16,0,0.27463040351867674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,32,8,128,1,fp8,fp8,0,0.28146240711212156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,32,4,128,1,float16,fp8,0,0.2617392063140869
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,32,4,128,1,fp8,fp8,0,0.2643791913986206
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,32,8,128,1,float16,float16,0,0.29103519916534426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,32,8,128,1,float16,fp8,0,0.28318560123443604
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,32,1,128,1,float16,fp8,0,4.301768112182617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,32,1,128,1,float16,float16,0,4.821688079833985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,32,1,128,1,fp8,fp8,0,4.297897720336914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,32,2,128,1,float16,fp8,0,4.373206329345703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,32,2,128,1,fp8,fp8,0,4.373320007324219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,32,2,128,1,float16,float16,0,5.011579132080078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,32,4,128,1,float16,fp8,0,4.576222229003906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,32,4,128,1,float16,float16,0,5.132011032104492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,32,1,128,1,float16,float16,0,2.398027229309082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,32,4,128,1,fp8,fp8,0,4.587184143066406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,32,32,128,1,float16,float16,0,4.028814315795898
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,32,32,128,1,float16,fp8,0,3.9354896545410156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,32,8,128,1,float16,fp8,0,5.036145782470703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,32,32,128,1,fp8,fp8,0,3.808108901977539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,32,8,128,1,float16,float16,0,5.528838348388672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,32,8,128,1,fp8,fp8,0,5.024518585205078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,32,1,128,1,float16,fp8,0,2.22607364654541
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,32,1,128,1,fp8,fp8,0,2.1423776626586912
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,32,2,128,1,float16,fp8,0,2.2149343490600586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,32,2,128,1,float16,float16,0,2.4087072372436524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,32,2,128,1,fp8,fp8,0,2.209491157531738
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,32,4,128,1,float16,float16,0,2.4467023849487304
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,32,4,128,1,float16,fp8,0,2.3665727615356444
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,32,4,128,1,fp8,fp8,0,2.3206527709960936
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,32,8,128,1,float16,float16,0,2.714064025878906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,32,8,128,1,float16,fp8,0,2.5103744506835937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,32,1,128,1,float16,float16,0,1.1350416183471679
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,32,32,128,1,float16,fp8,0,1.919590377807617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,32,32,128,1,float16,float16,0,2.0581008911132814
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,32,1,128,1,float16,fp8,0,1.0934144020080567
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,32,8,128,1,fp8,fp8,0,2.525289535522461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,32,32,128,1,fp8,fp8,0,1.9713104248046875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,32,1,128,1,fp8,fp8,0,1.1351712226867676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,32,2,128,1,float16,float16,0,1.1698351860046388
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,32,2,128,1,float16,fp8,0,1.1435327529907227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,32,2,128,1,fp8,fp8,0,1.1067904472351073
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,32,4,128,1,float16,float16,0,1.2231023788452149
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,32,4,128,1,float16,fp8,0,1.1643888473510742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,32,4,128,1,fp8,fp8,0,1.1719440460205077
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,32,8,128,1,float16,float16,0,1.3292655944824219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,32,8,128,1,float16,fp8,0,1.2764464378356934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,32,32,128,1,float16,float16,0,1.0166255950927734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,32,8,128,1,fp8,fp8,0,1.2649519920349122
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,32,32,128,1,float16,fp8,0,0.9686960220336914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,32,1,128,1,float16,float16,0,0.5795711994171142
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,32,1,128,1,float16,fp8,0,0.5585968017578125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,32,32,128,1,fp8,fp8,0,1.0473407745361327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,32,1,128,1,fp8,fp8,0,0.5626848220825196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,32,2,128,1,float16,float16,0,0.5908912181854248
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,32,2,128,1,float16,fp8,0,0.5768767833709717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,32,2,128,1,fp8,fp8,0,0.5680304050445557
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,32,4,128,1,float16,float16,0,0.6207968235015869
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,32,4,128,1,float16,fp8,0,0.5968688011169434
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,32,4,128,1,fp8,fp8,0,0.5938943862915039
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,32,32,128,1,float16,float16,0,0.5314000129699707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,32,8,128,1,float16,float16,0,0.6772031784057617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,32,8,128,1,float16,fp8,0,0.6505807876586914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,32,8,128,1,fp8,fp8,0,0.6496416091918945
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,32,32,128,1,float16,fp8,0,0.49901599884033204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,32,1,128,1,float16,float16,0,0.3001120090484619
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,32,32,128,1,fp8,fp8,0,0.498364782333374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,32,1,128,1,float16,fp8,0,0.2873647928237915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,32,1,128,1,fp8,fp8,0,0.29179360866546633
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,32,2,128,1,float16,float16,0,0.3070336103439331
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,32,2,128,1,float16,fp8,0,0.29839200973510743
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,32,2,128,1,fp8,fp8,0,0.3025199890136719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,32,4,128,1,float16,float16,0,0.32343358993530275
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,32,4,128,1,float16,fp8,0,0.31113440990448
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,32,4,128,1,fp8,fp8,0,0.30995199680328367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,32,8,128,1,float16,float16,0,0.34950239658355714
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,32,8,128,1,float16,fp8,0,0.3389375925064087
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,32,8,128,1,fp8,fp8,0,0.33729119300842286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,32,32,128,1,float16,float16,0,0.2816992044448853
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,32,32,128,1,float16,fp8,0,0.2609584093093872
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,32,32,128,1,fp8,fp8,0,0.26048638820648196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,32,1,128,1,float16,float16,0,0.16187679767608643
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,32,1,128,1,float16,fp8,0,0.15288959741592406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,32,1,128,1,fp8,fp8,0,0.15273760557174682
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,32,2,128,1,float16,float16,0,0.16452159881591796
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,32,4,128,1,fp8,fp8,0,0.16625759601593018
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,32,2,128,1,float16,fp8,0,0.15866880416870116
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,32,2,128,1,fp8,fp8,0,0.15664160251617432
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,32,4,128,1,float16,float16,0,0.1723695993423462
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,32,4,128,1,float16,fp8,0,0.167467200756073
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,32,8,128,1,float16,float16,0,0.1865120053291321
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,32,8,128,1,float16,fp8,0,0.18140319585800171
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,32,8,128,1,fp8,fp8,0,0.18392319679260255
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,32,1,128,1,float16,float16,0,4.433432006835938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,32,1,128,1,float16,fp8,0,4.215907287597656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,32,1,128,1,fp8,fp8,0,4.207721710205078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,32,2,128,1,float16,fp8,0,4.364092636108398
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,32,2,128,1,fp8,fp8,0,4.357308959960937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,32,2,128,1,float16,float16,0,4.848795318603516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,32,4,128,1,float16,fp8,0,4.630804824829101
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,32,4,128,1,float16,float16,0,5.055184173583984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,32,1,128,1,float16,float16,0,2.26615047454834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,32,4,128,1,fp8,fp8,0,4.6896625518798825
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,32,32,128,1,float16,float16,0,4.499665451049805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,32,8,128,1,float16,fp8,0,5.191486358642578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,32,8,128,1,float16,float16,0,5.652529525756836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,32,32,128,1,float16,fp8,0,4.397825622558594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,32,8,128,1,fp8,fp8,0,5.161059188842773
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,32,32,128,1,fp8,fp8,0,4.331820678710938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,32,1,128,1,float16,fp8,0,2.1708383560180664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,32,1,128,1,fp8,fp8,0,2.138091278076172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,32,2,128,1,float16,float16,0,2.328308868408203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,32,2,128,1,float16,fp8,0,2.1939680099487306
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,32,2,128,1,fp8,fp8,0,2.2252607345581055
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,32,4,128,1,float16,float16,0,2.47076473236084
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,32,4,128,1,float16,fp8,0,2.3250495910644533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,32,4,128,1,fp8,fp8,0,2.3342048645019533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,32,8,128,1,float16,float16,0,2.753539276123047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,32,8,128,1,float16,fp8,0,2.6263824462890626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,32,1,128,1,float16,float16,0,1.1086688041687012
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,32,1,128,1,float16,fp8,0,1.074289608001709
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,32,32,128,1,float16,float16,0,2.261710357666016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,32,32,128,1,float16,fp8,0,2.191691207885742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,32,8,128,1,fp8,fp8,0,2.679892730712891
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,32,1,128,1,fp8,fp8,0,1.1091919898986817
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,32,32,128,1,fp8,fp8,0,2.224473571777344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,32,2,128,1,float16,float16,0,1.1539983749389648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,32,2,128,1,float16,fp8,0,1.1068256378173829
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,32,2,128,1,fp8,fp8,0,1.110321617126465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,32,4,128,1,float16,float16,0,1.2229280471801758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,32,4,128,1,float16,fp8,0,1.1866656303405763
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,32,4,128,1,fp8,fp8,0,1.1763055801391602
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,32,8,128,1,float16,float16,0,1.372225570678711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,32,8,128,1,float16,fp8,0,1.3216608047485352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,32,1,128,1,float16,float16,0,0.5706799983978271
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,32,8,128,1,fp8,fp8,0,1.3375951766967773
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,32,32,128,1,float16,float16,0,1.1470255851745605
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,32,1,128,1,float16,fp8,0,0.5485392093658448
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,32,32,128,1,float16,fp8,0,1.1044416427612305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,32,32,128,1,fp8,fp8,0,1.1026559829711915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,32,1,128,1,fp8,fp8,0,0.5517615795135498
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,32,2,128,1,float16,float16,0,0.5854047775268555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,32,2,128,1,float16,fp8,0,0.5716623783111572
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,32,2,128,1,fp8,fp8,0,0.57151198387146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,32,4,128,1,float16,float16,0,0.6219344139099121
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,32,4,128,1,float16,fp8,0,0.6008143901824952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,32,4,128,1,fp8,fp8,0,0.6016287803649902
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,32,8,128,1,float16,float16,0,0.6932703971862793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,32,32,128,1,float16,fp8,0,0.5685632228851318
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,32,8,128,1,float16,fp8,0,0.671940803527832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,32,8,128,1,fp8,fp8,0,0.6726687908172607
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,32,32,128,1,float16,float16,0,0.5838816165924072
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,32,1,128,1,float16,float16,0,0.2946928024291992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,32,32,128,1,fp8,fp8,0,0.569217586517334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,32,1,128,1,float16,fp8,0,0.2890239953994751
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,32,1,128,1,fp8,fp8,0,0.2870431900024414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,32,2,128,1,float16,float16,0,0.30460638999938966
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,32,2,128,1,float16,fp8,0,0.29712800979614257
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,32,2,128,1,fp8,fp8,0,0.297163200378418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,32,4,128,1,float16,float16,0,0.3234992027282715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,32,4,128,1,float16,fp8,0,0.3146735906600952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,32,4,128,1,fp8,fp8,0,0.3159519910812378
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,32,8,128,1,float16,float16,0,0.358355188369751
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,32,8,128,1,float16,fp8,0,0.3464816093444824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,32,8,128,1,fp8,fp8,0,0.3493360042572021
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,32,1,128,1,fp8,fp8,0,0.15667519569396973
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,32,32,128,1,float16,float16,0,0.30460801124572756
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,32,32,128,1,float16,fp8,0,0.2967087984085083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,32,32,128,1,fp8,fp8,0,0.2944655895233154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,32,1,128,1,float16,float16,0,0.15795680284500122
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,32,1,128,1,float16,fp8,0,0.15678720474243163
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,32,2,128,1,float16,float16,0,0.1649664044380188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,32,2,128,1,float16,fp8,0,0.16236319541931152
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,32,2,128,1,fp8,fp8,0,0.16106239557266236
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,32,4,128,1,float16,float16,0,0.17331199645996093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,32,4,128,1,float16,fp8,0,0.16883679628372192
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,32,4,128,1,fp8,fp8,0,0.16876319646835328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,32,8,128,1,float16,float16,0,0.19146239757537842
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,32,32,128,1,fp8,fp8,0,0.15843520164489747
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,32,8,128,1,float16,fp8,0,0.1872864007949829
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,32,8,128,1,fp8,fp8,0,0.18815040588378906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,32,32,128,1,float16,float16,0,0.16280800104141235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,32,32,128,1,float16,fp8,0,0.1574944019317627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,32,1,128,1,float16,float16,0,0.08573439717292786
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,32,1,128,1,float16,fp8,0,0.08220319747924805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,32,1,128,1,fp8,fp8,0,0.0819920003414154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,32,2,128,1,float16,float16,0,0.08765119910240174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,32,2,128,1,float16,fp8,0,0.083924800157547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,32,2,128,1,fp8,fp8,0,0.08395519852638245
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,32,4,128,1,float16,float16,0,0.09535199999809266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,32,4,128,1,float16,fp8,0,0.08960319757461548
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,32,4,128,1,fp8,fp8,0,0.08798879981040955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,32,8,128,1,float16,float16,0,0.10661280155181885
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,32,8,128,1,float16,fp8,0,0.09623839855194091
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,32,8,128,1,fp8,fp8,0,0.09784160256385803
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,32,1,128,1,float16,float16,0,2.7480352401733397
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,32,1,128,1,float16,fp8,0,2.6170879364013673
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,32,1,128,1,fp8,fp8,0,2.6151216506958006
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,32,2,128,1,float16,fp8,0,2.727256011962891
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,32,2,128,1,float16,float16,0,2.8888671875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,32,2,128,1,fp8,fp8,0,2.747902488708496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,32,4,128,1,float16,float16,0,3.1034543991088865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,32,4,128,1,float16,fp8,0,2.963275146484375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,32,1,128,1,float16,float16,0,1.3474559783935547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,32,4,128,1,fp8,fp8,0,3.0254159927368165
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,32,8,128,1,float16,float16,0,3.475844955444336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,32,8,128,1,float16,fp8,0,3.343190383911133
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,32,32,128,1,float16,float16,0,3.0991439819335938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,32,8,128,1,fp8,fp8,0,3.3382030487060548
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,32,32,128,1,float16,fp8,0,2.992078399658203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,32,32,128,1,fp8,fp8,0,3.0296640396118164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,32,1,128,1,float16,fp8,0,1.373863983154297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,32,1,128,1,fp8,fp8,0,1.3328207969665526
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,32,2,128,1,float16,fp8,0,1.372652816772461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,32,2,128,1,float16,float16,0,1.4472975730895996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,32,2,128,1,fp8,fp8,0,1.384721565246582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,32,4,128,1,float16,float16,0,1.5190959930419923
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,32,4,128,1,float16,fp8,0,1.4822832107543946
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,32,4,128,1,fp8,fp8,0,1.4891023635864258
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,32,8,128,1,float16,float16,0,1.7349920272827148
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,32,8,128,1,float16,fp8,0,1.6996416091918944
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,32,1,128,1,float16,float16,0,0.6882575988769531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,32,8,128,1,fp8,fp8,0,1.6840591430664062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,32,32,128,1,float16,float16,0,1.5545392036437988
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,32,1,128,1,float16,fp8,0,0.6794015884399414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,32,32,128,1,float16,fp8,0,1.5154687881469726
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,32,32,128,1,fp8,fp8,0,1.5215760231018067
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,32,1,128,1,fp8,fp8,0,0.6911888122558594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,32,2,128,1,float16,float16,0,0.7145167827606201
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,32,2,128,1,float16,fp8,0,0.7007023811340332
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,32,2,128,1,fp8,fp8,0,0.6983920097351074
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,32,4,128,1,float16,float16,0,0.7719327926635742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,32,8,128,1,float16,float16,0,0.8813759803771972
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,32,4,128,1,float16,fp8,0,0.7561344146728516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,32,4,128,1,fp8,fp8,0,0.7558879852294922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,32,8,128,1,float16,fp8,0,0.8599568367004394
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,32,8,128,1,fp8,fp8,0,0.8623968124389648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,32,32,128,1,float16,float16,0,0.7841328144073486
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,32,1,128,1,float16,float16,0,0.3553040027618408
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,32,32,128,1,float16,fp8,0,0.7675551891326904
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,32,32,128,1,fp8,fp8,0,0.7722239971160889
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,32,1,128,1,float16,fp8,0,0.3502703905105591
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,32,1,128,1,fp8,fp8,0,0.35035200119018556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,32,2,128,1,float16,float16,0,0.3690176010131836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,32,2,128,1,float16,fp8,0,0.36587839126586913
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,32,2,128,1,fp8,fp8,0,0.3627039909362793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,32,4,128,1,float16,float16,0,0.39812641143798827
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,32,4,128,1,float16,fp8,0,0.38676159381866454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,32,4,128,1,fp8,fp8,0,0.3898303985595703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,32,8,128,1,float16,float16,0,0.4530064105987549
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,32,8,128,1,float16,fp8,0,0.4413919925689697
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,32,8,128,1,fp8,fp8,0,0.4422031879425049
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,32,32,128,1,float16,float16,0,0.40549278259277344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,32,32,128,1,float16,fp8,0,0.39751200675964354
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,32,32,128,1,fp8,fp8,0,0.4007840156555176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,32,1,128,1,float16,float16,0,0.18700640201568602
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,32,1,128,1,float16,fp8,0,0.18609919548034667
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,32,1,128,1,fp8,fp8,0,0.18770079612731932
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,32,2,128,1,float16,float16,0,0.19530880451202393
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,32,2,128,1,float16,fp8,0,0.19455840587615966
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,32,2,128,1,fp8,fp8,0,0.19218080043792723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,32,4,128,1,float16,float16,0,0.21046879291534423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,32,4,128,1,float16,fp8,0,0.20798399448394775
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,32,4,128,1,fp8,fp8,0,0.20617918968200682
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,32,8,128,1,float16,float16,0,0.23871839046478271
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,32,8,128,1,float16,fp8,0,0.23271360397338867
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,32,8,128,1,fp8,fp8,0,0.23140640258789064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,32,32,128,1,float16,float16,0,0.21912000179290772
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,32,32,128,1,float16,fp8,0,0.21178719997406006
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,32,32,128,1,fp8,fp8,0,0.21168479919433594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,32,1,128,1,float16,float16,0,0.10659999847412109
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,32,1,128,1,float16,fp8,0,0.10055840015411377
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,32,1,128,1,fp8,fp8,0,0.10054559707641601
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,32,2,128,1,float16,float16,0,0.1078112006187439
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,32,2,128,1,float16,fp8,0,0.10302239656448364
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,32,2,128,1,fp8,fp8,0,0.10421919822692871
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,32,4,128,1,float16,float16,0,0.11509920358657837
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,32,4,128,1,float16,fp8,0,0.11215039491653442
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,32,4,128,1,fp8,fp8,0,0.11260800361633301
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,32,32,128,1,float16,fp8,0,0.11400799751281739
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,32,8,128,1,float16,float16,0,0.12986400127410888
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,32,8,128,1,float16,fp8,0,0.12735199928283691
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,32,8,128,1,fp8,fp8,0,0.12755199670791625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,32,32,128,1,float16,float16,0,0.11914240121841431
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,32,32,128,1,fp8,fp8,0,0.11313439607620239
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,32,1,128,1,float16,float16,0,0.05911679863929749
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,32,1,128,1,float16,fp8,0,0.059222400188446045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,32,1,128,1,fp8,fp8,0,0.05860000252723694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,32,2,128,1,float16,float16,0,0.06002399921417236
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,32,2,128,1,float16,fp8,0,0.05982720255851746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,32,2,128,1,fp8,fp8,0,0.059875202178955075
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,32,4,128,1,float16,float16,0,0.0631824016571045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,32,4,128,1,float16,fp8,0,0.06208639740943909
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,32,4,128,1,fp8,fp8,0,0.06200000047683716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,32,8,128,1,float16,float16,0,0.07213280200958253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,32,8,128,1,float16,fp8,0,0.06933280229568481
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,32,8,128,1,fp8,fp8,0,0.06839200258255004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,32,1,128,1,float16,float16,0,2.8226335525512694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,32,1,128,1,float16,fp8,0,2.7586015701293944
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,32,1,128,1,fp8,fp8,0,2.7509151458740235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,32,2,128,1,float16,float16,0,2.939031982421875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,32,2,128,1,float16,fp8,0,2.916304016113281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,32,2,128,1,fp8,fp8,0,2.902499198913574
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,32,4,128,1,float16,float16,0,3.277547073364258
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,32,4,128,1,float16,fp8,0,3.2012142181396483
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,32,4,128,1,fp8,fp8,0,3.237465667724609
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,32,1,128,1,float16,float16,0,1.4032544136047362
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,32,8,128,1,float16,float16,0,3.8341953277587892
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,32,8,128,1,float16,fp8,0,3.7307056427001952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,32,8,128,1,fp8,fp8,0,3.7689361572265625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,32,32,128,1,float16,float16,0,3.6616161346435545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,32,1,128,1,float16,fp8,0,1.4019951820373535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,32,32,128,1,float16,fp8,0,3.618756866455078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,32,32,128,1,fp8,fp8,0,3.636529541015625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,32,1,128,1,fp8,fp8,0,1.4389167785644532
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,32,2,128,1,float16,float16,0,1.496622371673584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,32,2,128,1,float16,fp8,0,1.4593520164489746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,32,2,128,1,fp8,fp8,0,1.4730863571166992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,32,4,128,1,float16,float16,0,1.617487907409668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,32,4,128,1,float16,fp8,0,1.6141199111938476
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,32,4,128,1,fp8,fp8,0,1.6042720794677734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,32,8,128,1,float16,float16,0,1.9217279434204102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,32,8,128,1,float16,fp8,0,1.892969512939453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,32,1,128,1,float16,float16,0,0.7157983779907227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,32,8,128,1,fp8,fp8,0,1.910603141784668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,32,1,128,1,float16,fp8,0,0.7112175941467285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,32,32,128,1,float16,float16,0,1.8485055923461915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,32,32,128,1,float16,fp8,0,1.8308223724365233
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,32,1,128,1,fp8,fp8,0,0.7256944179534912
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,32,2,128,1,float16,float16,0,0.7519040107727051
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,32,32,128,1,fp8,fp8,0,1.8190383911132812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,32,2,128,1,float16,fp8,0,0.7480112075805664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,32,2,128,1,fp8,fp8,0,0.7468783855438232
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,32,4,128,1,float16,float16,0,0.8226559638977051
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,32,4,128,1,float16,fp8,0,0.8105744361877442
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,32,4,128,1,fp8,fp8,0,0.8206831932067871
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,32,8,128,1,float16,fp8,0,0.9553279876708984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,32,8,128,1,float16,float16,0,0.9745488166809082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,32,8,128,1,fp8,fp8,0,0.9507712364196778
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,32,32,128,1,float16,float16,0,0.9450160026550293
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,32,1,128,1,float16,float16,0,0.36500000953674316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,32,32,128,1,float16,fp8,0,0.9278816223144531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,32,1,128,1,float16,fp8,0,0.370415997505188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,32,32,128,1,fp8,fp8,0,0.9219776153564453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,32,1,128,1,fp8,fp8,0,0.36716480255126954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,32,2,128,1,float16,float16,0,0.3866080045700073
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,32,2,128,1,float16,fp8,0,0.3832047939300537
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,32,2,128,1,fp8,fp8,0,0.38721280097961425
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,32,4,128,1,float16,float16,0,0.42108001708984377
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,32,4,128,1,float16,fp8,0,0.4207744121551514
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,32,4,128,1,fp8,fp8,0,0.4176464080810547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,32,8,128,1,float16,float16,0,0.49743199348449707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,32,1,128,1,float16,float16,0,0.19744319915771485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,32,8,128,1,float16,fp8,0,0.4897119998931885
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,32,8,128,1,fp8,fp8,0,0.4882927894592285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,32,32,128,1,float16,float16,0,0.4821824073791504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,32,32,128,1,float16,fp8,0,0.47322402000427244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,32,32,128,1,fp8,fp8,0,0.4732560157775879
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,32,1,128,1,float16,fp8,0,0.19586399793624878
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,32,1,128,1,fp8,fp8,0,0.19672319889068604
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,32,2,128,1,float16,float16,0,0.20600318908691406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,32,2,128,1,float16,fp8,0,0.20506880283355713
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,32,2,128,1,fp8,fp8,0,0.20508959293365478
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,32,4,128,1,float16,float16,0,0.22279040813446044
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,32,4,128,1,float16,fp8,0,0.22115681171417237
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,32,4,128,1,fp8,fp8,0,0.22065279483795167
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,32,8,128,1,float16,float16,0,0.2600352048873901
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,32,8,128,1,float16,fp8,0,0.25623838901519774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,32,8,128,1,fp8,fp8,0,0.25702879428863523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,32,32,128,1,float16,float16,0,0.25477280616760256
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,32,32,128,1,float16,fp8,0,0.2481600046157837
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,32,32,128,1,fp8,fp8,0,0.247324800491333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,32,1,128,1,float16,float16,0,0.10819679498672485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,32,1,128,1,float16,fp8,0,0.10787839889526367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,32,1,128,1,fp8,fp8,0,0.10861439704895019
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,32,2,128,1,float16,float16,0,0.11427520513534546
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,32,2,128,1,float16,fp8,0,0.11492639780044556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,32,2,128,1,fp8,fp8,0,0.11496800184249878
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,32,4,128,1,float16,float16,0,0.12136000394821167
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,32,4,128,1,float16,fp8,0,0.12256319522857666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,32,4,128,1,fp8,fp8,0,0.12158399820327759
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,32,8,128,1,float16,float16,0,0.14108799695968627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,32,8,128,1,float16,fp8,0,0.14033759832382203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,32,8,128,1,fp8,fp8,0,0.1403839945793152
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,32,32,128,1,float16,float16,0,0.13741279840469361
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,32,32,128,1,float16,fp8,0,0.13390400409698486
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,32,32,128,1,fp8,fp8,0,0.13467040061950683
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,32,1,128,1,float16,float16,0,0.06038399934768677
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,32,1,128,1,float16,fp8,0,0.06050080060958862
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,32,1,128,1,fp8,fp8,0,0.060247999429702756
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,32,2,128,1,float16,float16,0,0.06428160071372986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,32,2,128,1,float16,fp8,0,0.06143519878387451
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,32,2,128,1,fp8,fp8,0,0.06088640093803406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,32,4,128,1,float16,float16,0,0.06925439834594727
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,32,4,128,1,float16,fp8,0,0.06594719886779785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,32,4,128,1,fp8,fp8,0,0.06628320217132569
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,32,8,128,1,float16,float16,0,0.08015360236167908
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,32,8,128,1,float16,fp8,0,0.07467039823532104
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,32,8,128,1,fp8,fp8,0,0.0736735999584198
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,32,32,128,1,float16,float16,0,0.07689599990844727
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,32,32,128,1,float16,fp8,0,0.07048640251159669
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,32,32,128,1,fp8,fp8,0,0.06949599981307983
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,32,1,128,1,float16,float16,0,0.03853920102119446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,32,1,128,1,float16,fp8,0,0.03994880020618439
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,32,1,128,1,fp8,fp8,0,0.040068799257278444
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,32,2,128,1,float16,float16,0,0.039043200016021726
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,32,2,128,1,float16,fp8,0,0.03995679914951324
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,32,2,128,1,fp8,fp8,0,0.03994239866733551
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,32,4,128,1,float16,float16,0,0.040299201011657716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,32,4,128,1,float16,fp8,0,0.041283199191093446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,32,4,128,1,fp8,fp8,0,0.041089600324630736
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,32,8,128,1,float16,float16,0,0.044500800967216494
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,32,8,128,1,fp8,fp8,0,0.045684799551963806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,32,8,128,1,float16,fp8,0,0.045649600028991696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,32,1,128,1,float16,float16,0,1.9764863967895507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,32,1,128,1,float16,fp8,0,2.030289649963379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,32,1,128,1,fp8,fp8,0,2.0297088623046875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,32,2,128,1,float16,float16,0,2.126705551147461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,32,2,128,1,float16,fp8,0,2.1673519134521486
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,32,2,128,1,fp8,fp8,0,2.1711360931396486
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,32,4,128,1,float16,float16,0,2.435006332397461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,32,4,128,1,float16,fp8,0,2.4577903747558594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,32,4,128,1,fp8,fp8,0,2.4704240798950194
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,32,1,128,1,float16,float16,0,1.0034687995910645
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,32,8,128,1,float16,float16,0,3.0150735855102537
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,32,8,128,1,float16,fp8,0,3.012187194824219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,32,8,128,1,fp8,fp8,0,3.0309375762939452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,32,32,128,1,float16,float16,0,3.2723392486572265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,32,1,128,1,float16,fp8,0,1.0339648246765136
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,32,1,128,1,fp8,fp8,0,1.0280960083007813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,32,32,128,1,float16,fp8,0,3.275084686279297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,32,32,128,1,fp8,fp8,0,3.283854293823242
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,32,2,128,1,float16,float16,0,1.0716927528381348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,32,2,128,1,float16,fp8,0,1.098475170135498
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,32,2,128,1,fp8,fp8,0,1.1064559936523437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,32,4,128,1,float16,float16,0,1.222334384918213
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,32,4,128,1,float16,fp8,0,1.2457232475280762
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,32,4,128,1,fp8,fp8,0,1.2381664276123048
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,32,8,128,1,float16,float16,0,1.5241727828979492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,32,8,128,1,float16,fp8,0,1.5252991676330567
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,32,1,128,1,float16,float16,0,0.5156767845153809
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,32,8,128,1,fp8,fp8,0,1.5170528411865234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,32,1,128,1,float16,fp8,0,0.526523208618164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,32,32,128,1,float16,float16,0,1.6540687561035157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,32,32,128,1,float16,fp8,0,1.6610448837280274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,32,32,128,1,fp8,fp8,0,1.6491472244262695
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,32,1,128,1,fp8,fp8,0,0.526910400390625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,32,2,128,1,float16,float16,0,0.5512735843658447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,32,2,128,1,float16,fp8,0,0.5633359909057617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,32,2,128,1,fp8,fp8,0,0.560964822769165
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,32,4,128,1,float16,float16,0,0.6268879890441894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,32,4,128,1,float16,fp8,0,0.6304287910461426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,32,4,128,1,fp8,fp8,0,0.6352784156799316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,32,8,128,1,float16,float16,0,0.7721504211425781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,32,8,128,1,float16,fp8,0,0.7702784061431884
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,32,8,128,1,fp8,fp8,0,0.7723775863647461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,32,1,128,1,float16,float16,0,0.2689440011978149
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,32,32,128,1,float16,float16,0,0.8448703765869141
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,32,32,128,1,float16,fp8,0,0.8367903709411622
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,32,1,128,1,float16,fp8,0,0.2768991947174072
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,32,32,128,1,fp8,fp8,0,0.8384655952453614
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,32,1,128,1,fp8,fp8,0,0.2745232105255127
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,32,2,128,1,float16,float16,0,0.2870512008666992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,32,2,128,1,float16,fp8,0,0.293887996673584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,32,2,128,1,fp8,fp8,0,0.2928112030029297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,32,4,128,1,float16,float16,0,0.3238271951675415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,32,4,128,1,float16,fp8,0,0.326857590675354
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,32,4,128,1,fp8,fp8,0,0.3282288074493408
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,32,8,128,1,float16,float16,0,0.39916000366210935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,32,8,128,1,float16,fp8,0,0.39571681022644045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,32,8,128,1,fp8,fp8,0,0.39532639980316164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,32,32,128,1,float16,float16,0,0.4333983898162842
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,32,32,128,1,float16,fp8,0,0.43174080848693847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,32,1,128,1,float16,float16,0,0.1467679977416992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,32,32,128,1,fp8,fp8,0,0.4306528091430664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,32,1,128,1,float16,fp8,0,0.14994239807128906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,32,1,128,1,fp8,fp8,0,0.15019999742507933
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,32,2,128,1,float16,float16,0,0.15521600246429443
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,32,2,128,1,float16,fp8,0,0.1583951950073242
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,32,2,128,1,fp8,fp8,0,0.15807679891586304
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,32,4,128,1,float16,float16,0,0.17360960245132445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,32,4,128,1,float16,fp8,0,0.17583680152893066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,32,4,128,1,fp8,fp8,0,0.17580640316009521
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,32,8,128,1,float16,float16,0,0.21008000373840333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,32,8,128,1,float16,fp8,0,0.20953600406646727
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,32,8,128,1,fp8,fp8,0,0.2101360082626343
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,32,32,128,1,float16,float16,0,0.22824480533599853
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,32,32,128,1,float16,fp8,0,0.2262495994567871
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,32,32,128,1,fp8,fp8,0,0.22578079700469972
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,32,1,128,1,float16,float16,0,0.08407199978828431
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,32,1,128,1,float16,fp8,0,0.08490399718284607
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,32,1,128,1,fp8,fp8,0,0.08531360030174255
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,32,2,128,1,float16,float16,0,0.09073920249938965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,32,2,128,1,float16,fp8,0,0.09119359850883484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,32,2,128,1,fp8,fp8,0,0.0909280002117157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,32,4,128,1,float16,float16,0,0.09751200079917907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,32,4,128,1,float16,fp8,0,0.09857280254364013
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,32,4,128,1,fp8,fp8,0,0.09858400225639344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,32,32,128,1,float16,fp8,0,0.1256783962249756
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,32,32,128,1,fp8,fp8,0,0.12536319494247436
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,32,8,128,1,float16,float16,0,0.11730719804763794
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,32,8,128,1,float16,fp8,0,0.11716959476470948
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,32,8,128,1,fp8,fp8,0,0.11681280136108399
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,32,32,128,1,float16,float16,0,0.12558239698410034
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,32,1,128,1,float16,float16,0,0.04805119931697845
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,32,1,128,1,float16,fp8,0,0.04821760058403015
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,32,1,128,1,fp8,fp8,0,0.048321598768234254
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,32,2,128,1,float16,float16,0,0.04904800057411194
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,32,2,128,1,float16,fp8,0,0.049326398968696596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,32,2,128,1,fp8,fp8,0,0.04985440075397492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,32,4,128,1,float16,float16,0,0.056720000505447385
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,32,4,128,1,float16,fp8,0,0.05505759716033935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,32,4,128,1,fp8,fp8,0,0.05539360046386719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,32,8,128,1,float16,float16,0,0.0675872027873993
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,32,8,128,1,float16,fp8,0,0.06215360164642334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,32,8,128,1,fp8,fp8,0,0.06289920210838318
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,32,32,128,1,float16,float16,0,0.0733456015586853
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,32,32,128,1,float16,fp8,0,0.06810879707336426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,32,32,128,1,fp8,fp8,0,0.06744800209999084
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,32,1,128,1,float16,float16,0,0.03241440057754517
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,32,1,128,1,float16,fp8,0,0.03438239991664886
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,32,1,128,1,fp8,fp8,0,0.034179198741912845
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,32,2,128,1,float16,float16,0,0.032708799839019774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,32,2,128,1,float16,fp8,0,0.03446879982948303
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,32,2,128,1,fp8,fp8,0,0.03444640040397644
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,32,4,128,1,float16,float16,0,0.0338128000497818
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,32,4,128,1,float16,fp8,0,0.035787200927734374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,32,4,128,1,fp8,fp8,0,0.03583999872207642
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,32,8,128,1,float16,float16,0,0.03855679929256439
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,32,8,128,1,float16,fp8,0,0.03984160125255585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,32,8,128,1,fp8,fp8,0,0.0395823985338211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,32,32,128,1,float16,float16,0,0.04066559970378876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,32,32,128,1,float16,fp8,0,0.04211359918117523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,32,32,128,1,fp8,fp8,0,0.04217920005321503
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,32,1,128,1,float16,float16,0,0.027846398949623107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,32,1,128,1,float16,fp8,0,0.02948960065841675
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,32,1,128,1,fp8,fp8,0,0.02900800108909607
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,32,2,128,1,float16,float16,0,0.027564799785614012
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,32,2,128,1,float16,fp8,0,0.02871040105819702
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,32,2,128,1,fp8,fp8,0,0.028731200098991393
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,32,4,128,1,float16,float16,0,0.02791520059108734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,32,4,128,1,float16,fp8,0,0.02911199927330017
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,32,4,128,1,fp8,fp8,0,0.029308798909187316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,32,8,128,1,float16,float16,0,0.028969600796699524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,32,8,128,1,float16,fp8,0,0.030041599273681642
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,32,8,128,1,fp8,fp8,0,0.029783999919891356
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,32,1,128,1,float16,float16,0,0.8026032447814941
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,32,1,128,1,float16,fp8,0,0.8444560050964356
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,32,2,128,1,float16,float16,0,0.8800512313842773
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,32,1,128,1,fp8,fp8,0,0.8437904357910156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,32,2,128,1,float16,fp8,0,0.9142815589904785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,32,2,128,1,fp8,fp8,0,0.9144975662231445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,32,4,128,1,float16,float16,0,1.0283216476440429
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,32,4,128,1,float16,fp8,0,1.0543456077575684
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,32,4,128,1,fp8,fp8,0,1.0570863723754882
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,32,8,128,1,float16,float16,0,1.3221072196960448
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,32,8,128,1,float16,fp8,0,1.3395952224731444
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,32,8,128,1,fp8,fp8,0,1.333743953704834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,32,1,128,1,float16,float16,0,0.41309919357299807
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,32,1,128,1,float16,fp8,0,0.43453278541564944
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,32,1,128,1,fp8,fp8,0,0.4353151798248291
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,32,32,128,1,float16,float16,0,1.5659872055053712
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,32,2,128,1,float16,float16,0,0.45267682075500487
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,32,32,128,1,float16,fp8,0,1.5389328002929688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,32,32,128,1,fp8,fp8,0,1.5365856170654297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,32,2,128,1,float16,fp8,0,0.46900157928466796
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,32,2,128,1,fp8,fp8,0,0.4691904067993164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,32,4,128,1,float16,float16,0,0.524022388458252
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,32,4,128,1,float16,fp8,0,0.5388448238372803
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,32,4,128,1,fp8,fp8,0,0.5406943798065186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,32,8,128,1,float16,float16,0,0.6751391887664795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,32,8,128,1,float16,fp8,0,0.6796800136566162
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,32,8,128,1,fp8,fp8,0,0.6789887905120849
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,32,32,128,1,float16,float16,0,0.7966944217681885
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,32,32,128,1,float16,fp8,0,0.7802847862243653
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,32,1,128,1,float16,float16,0,0.22019360065460206
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,32,32,128,1,fp8,fp8,0,0.7801392078399658
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,32,1,128,1,float16,fp8,0,0.2311840057373047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,32,1,128,1,fp8,fp8,0,0.23034238815307617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,32,2,128,1,float16,float16,0,0.24011518955230712
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,32,2,128,1,float16,fp8,0,0.24779200553894043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,32,2,128,1,fp8,fp8,0,0.24756960868835448
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,32,4,128,1,float16,float16,0,0.2760400056838989
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,32,4,128,1,float16,fp8,0,0.282039999961853
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,32,4,128,1,fp8,fp8,0,0.2833424091339111
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,32,8,128,1,float16,float16,0,0.3491375923156738
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,32,8,128,1,float16,fp8,0,0.3525887966156006
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,32,8,128,1,fp8,fp8,0,0.3503936052322388
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,32,32,128,1,float16,float16,0,0.40959677696228025
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,32,32,128,1,float16,fp8,0,0.40262398719787595
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,32,1,128,1,float16,float16,0,0.12369920015335083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,32,32,128,1,fp8,fp8,0,0.4023295879364014
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,32,1,128,1,float16,fp8,0,0.12840800285339354
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,32,1,128,1,fp8,fp8,0,0.12787840366363526
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,32,2,128,1,float16,float16,0,0.13076800107955933
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,32,2,128,1,float16,fp8,0,0.13527200222015381
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,32,2,128,1,fp8,fp8,0,0.13619359731674194
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,32,4,128,1,float16,float16,0,0.1511952042579651
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,32,4,128,1,float16,fp8,0,0.15302879810333253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,32,4,128,1,fp8,fp8,0,0.1537008047103882
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,32,8,128,1,float16,float16,0,0.1865407943725586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,32,8,128,1,float16,fp8,0,0.18708319664001466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,32,8,128,1,fp8,fp8,0,0.1874768018722534
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,32,32,128,1,float16,float16,0,0.2178272008895874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,32,32,128,1,float16,fp8,0,0.21236159801483154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,32,32,128,1,fp8,fp8,0,0.2117471933364868
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,32,1,128,1,float16,float16,0,0.07167360186576843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,32,1,128,1,float16,fp8,0,0.07395840287208558
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,32,1,128,1,fp8,fp8,0,0.07439360022544861
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,32,2,128,1,float16,float16,0,0.07711840271949769
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,32,2,128,1,float16,fp8,0,0.08063679933547974
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,32,2,128,1,fp8,fp8,0,0.07996799945831298
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,32,4,128,1,float16,float16,0,0.08553599715232849
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,32,4,128,1,float16,fp8,0,0.0876688003540039
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,32,4,128,1,fp8,fp8,0,0.08768960237503051
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,32,8,128,1,float16,float16,0,0.1050447940826416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,32,8,128,1,float16,fp8,0,0.10515999794006348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,32,8,128,1,fp8,fp8,0,0.10488959550857543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,32,32,128,1,float16,float16,0,0.1200943946838379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,32,32,128,1,float16,fp8,0,0.11742080450057983
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,32,32,128,1,fp8,fp8,0,0.11750240325927734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,32,1,128,1,float16,float16,0,0.04380959868431091
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,32,1,128,1,float16,fp8,0,0.04381439983844757
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,32,1,128,1,fp8,fp8,0,0.04353919923305512
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,32,2,128,1,float16,float16,0,0.045075199007987975
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,32,2,128,1,float16,fp8,0,0.04419040083885193
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,32,2,128,1,fp8,fp8,0,0.044654399156570435
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,32,4,128,1,float16,float16,0,0.05143359899520874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,32,4,128,1,float16,fp8,0,0.04913919866085052
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,32,4,128,1,fp8,fp8,0,0.050337600708007815
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,32,8,128,1,float16,float16,0,0.06266400218009949
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,32,8,128,1,float16,fp8,0,0.05925120115280151
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,32,8,128,1,fp8,fp8,0,0.05773119926452637
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,32,32,128,1,float16,float16,0,0.06909440159797668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,32,32,128,1,float16,fp8,0,0.062326401472091675
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,32,32,128,1,fp8,fp8,0,0.06177440285682678
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,32,1,128,1,float16,float16,0,0.030480000376701354
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,32,1,128,1,float16,fp8,0,0.03236320018768311
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,32,1,128,1,fp8,fp8,0,0.032513600587844846
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,32,2,128,1,float16,float16,0,0.031124800443649292
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,32,2,128,1,float16,fp8,0,0.03227199912071228
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,32,2,128,1,fp8,fp8,0,0.03219839930534363
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,32,4,128,1,float16,float16,0,0.032158398628234865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,32,4,128,1,float16,fp8,0,0.03371039927005768
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,32,32,128,1,float16,fp8,0,0.03801440000534058
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,32,4,128,1,fp8,fp8,0,0.033456000685691836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,32,8,128,1,float16,float16,0,0.0365776002407074
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,32,8,128,1,float16,fp8,0,0.03806720077991486
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,32,8,128,1,fp8,fp8,0,0.03805600106716156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,32,32,128,1,float16,float16,0,0.038043200969696045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,32,32,128,1,fp8,fp8,0,0.03854719996452331
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,32,1,128,1,float16,float16,0,0.024835200607776643
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,32,1,128,1,float16,fp8,0,0.025177600979804992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,32,1,128,1,fp8,fp8,0,0.025569599866867066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,32,2,128,1,float16,float16,0,0.024691200256347655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,32,2,128,1,float16,fp8,0,0.025382399559020996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,32,2,128,1,fp8,fp8,0,0.025833600759506227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,32,4,128,1,float16,float16,0,0.024916799366474153
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,32,4,128,1,float16,fp8,0,0.0257968008518219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,32,4,128,1,fp8,fp8,0,0.02569440007209778
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,32,8,128,1,float16,float16,0,0.02570880055427551
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,32,8,128,1,float16,fp8,0,0.026679998636245726
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,32,8,128,1,fp8,fp8,0,0.02651360034942627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,32,32,128,1,float16,float16,0,0.02903839945793152
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,32,32,128,1,float16,fp8,0,0.029967999458312987
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,32,32,128,1,fp8,fp8,0,0.02995840013027191
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,32,1,128,1,float16,float16,0,0.023019200563430785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,32,1,128,1,float16,fp8,0,0.023867200314998626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,32,1,128,1,fp8,fp8,0,0.023636800050735474
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,32,2,128,1,float16,float16,0,0.02351839989423752
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,32,2,128,1,float16,fp8,0,0.02396959960460663
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,32,2,128,1,fp8,fp8,0,0.024065600335597993
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,32,4,128,1,float16,float16,0,0.023318399488925935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,32,4,128,1,float16,fp8,0,0.024164800345897675
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,32,4,128,1,fp8,fp8,0,0.024128000438213348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,32,8,128,1,float16,float16,0,0.02340800017118454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,32,8,128,1,float16,fp8,0,0.024292799830436706
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,32,8,128,1,fp8,fp8,0,0.024886399507522583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,32,1,128,1,float16,float16,0,0.38816959857940675
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,32,1,128,1,float16,fp8,0,0.415880012512207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,32,1,128,1,fp8,fp8,0,0.41918721199035647
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,32,2,128,1,float16,float16,0,0.42522878646850587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,32,2,128,1,float16,fp8,0,0.45211038589477537
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,32,2,128,1,fp8,fp8,0,0.4526480197906494
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,32,4,128,1,float16,float16,0,0.49912638664245607
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,32,4,128,1,float16,fp8,0,0.5247392177581787
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,32,4,128,1,fp8,fp8,0,0.5261360168457031
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,32,8,128,1,float16,float16,0,0.6452000141143799
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,32,8,128,1,float16,fp8,0,0.6729951858520508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,32,8,128,1,fp8,fp8,0,0.6710480213165283
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,32,32,128,1,float16,float16,0,0.777342414855957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,32,1,128,1,fp8,fp8,0,0.21986238956451415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,32,1,128,1,float16,float16,0,0.20593440532684326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,32,32,128,1,float16,fp8,0,0.7781919956207275
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,32,1,128,1,float16,fp8,0,0.22139201164245606
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,32,32,128,1,fp8,fp8,0,0.7758416175842285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,32,2,128,1,float16,float16,0,0.22279040813446044
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,32,2,128,1,fp8,fp8,0,0.23774878978729247
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,32,2,128,1,float16,fp8,0,0.2381455898284912
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,32,8,128,1,float16,float16,0,0.3340303897857666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,32,4,128,1,float16,float16,0,0.2603280067443848
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,32,4,128,1,float16,fp8,0,0.275550389289856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,32,4,128,1,fp8,fp8,0,0.2747551918029785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,32,8,128,1,float16,fp8,0,0.3439728021621704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,32,8,128,1,fp8,fp8,0,0.3454384088516235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,32,32,128,1,float16,float16,0,0.4009568214416504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,32,32,128,1,float16,fp8,0,0.39926559925079347
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,32,1,128,1,float16,float16,0,0.11414079666137696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,32,32,128,1,fp8,fp8,0,0.400819206237793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,32,1,128,1,float16,fp8,0,0.12417919635772705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,32,1,128,1,fp8,fp8,0,0.12394720315933228
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,32,2,128,1,float16,float16,0,0.1229375958442688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,32,2,128,1,float16,fp8,0,0.13154720067977904
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,32,2,128,1,fp8,fp8,0,0.13180480003356934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,32,4,128,1,float16,float16,0,0.14080480337142945
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,32,4,128,1,float16,fp8,0,0.14947359561920165
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,32,4,128,1,fp8,fp8,0,0.15069279670715333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,32,8,128,1,float16,float16,0,0.17690720558166503
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,32,8,128,1,float16,fp8,0,0.18540799617767334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,32,8,128,1,fp8,fp8,0,0.18447040319442748
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,32,32,128,1,float16,float16,0,0.2132863998413086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,32,32,128,1,float16,fp8,0,0.209168004989624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,32,32,128,1,fp8,fp8,0,0.20913760662078856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,32,1,128,1,float16,float16,0,0.06911680102348328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,32,1,128,1,float16,fp8,0,0.07095999717712402
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,32,1,128,1,fp8,fp8,0,0.07121599912643432
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,32,2,128,1,float16,float16,0,0.07493280172348023
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,32,2,128,1,float16,fp8,0,0.07710880041122437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,32,2,128,1,fp8,fp8,0,0.07668160200119019
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,32,8,128,1,fp8,fp8,0,0.1024176001548767
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,32,4,128,1,float16,float16,0,0.08300960063934326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,32,32,128,1,float16,fp8,0,0.11245759725570678
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,32,4,128,1,float16,fp8,0,0.08446400165557862
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,32,4,128,1,fp8,fp8,0,0.08489120006561279
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,32,8,128,1,float16,float16,0,0.10147999525070191
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,32,8,128,1,float16,fp8,0,0.10246560573577881
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,32,32,128,1,float16,float16,0,0.1174847960472107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,32,32,128,1,fp8,fp8,0,0.11276639699935913
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,32,1,128,1,float16,float16,0,0.03835839927196503
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,32,1,128,1,float16,fp8,0,0.03948479890823364
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,32,1,128,1,fp8,fp8,0,0.03956480026245117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,32,2,128,1,float16,float16,0,0.04174720048904419
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,32,2,128,1,float16,fp8,0,0.0407584011554718
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,32,2,128,1,fp8,fp8,0,0.04072639942169189
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,32,4,128,1,float16,float16,0,0.04756479859352112
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,32,4,128,1,float16,fp8,0,0.04579679965972901
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,32,4,128,1,fp8,fp8,0,0.04555999934673309
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,32,8,128,1,float16,float16,0,0.05930240154266357
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,32,8,128,1,float16,fp8,0,0.053251200914382936
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,32,8,128,1,fp8,fp8,0,0.05521759986877441
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,32,32,128,1,float16,float16,0,0.0663424015045166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,32,32,128,1,float16,fp8,0,0.058164799213409425
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,32,32,128,1,fp8,fp8,0,0.057574397325515746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,32,1,128,1,float16,float16,0,0.027635198831558228
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,32,1,128,1,float16,fp8,0,0.029070401191711427
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,32,1,128,1,fp8,fp8,0,0.02890239953994751
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,32,2,128,1,float16,float16,0,0.02808000147342682
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,32,2,128,1,float16,fp8,0,0.029311999678611755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,32,2,128,1,fp8,fp8,0,0.029337599873542786
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,32,4,128,1,float16,float16,0,0.02905600070953369
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,32,4,128,1,float16,fp8,0,0.030273601412773132
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,32,4,128,1,fp8,fp8,0,0.030163198709487915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,32,8,128,1,float16,float16,0,0.03415519893169403
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,32,8,128,1,float16,fp8,0,0.03442400097846985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,32,8,128,1,fp8,fp8,0,0.03442080020904541
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,32,32,128,1,float16,float16,0,0.03689759969711304
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,32,32,128,1,float16,fp8,0,0.03651039898395538
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,32,32,128,1,fp8,fp8,0,0.03644480109214783
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,32,1,128,1,float16,float16,0,0.023078399896621703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,32,1,128,1,float16,fp8,0,0.023736000061035156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,32,1,128,1,fp8,fp8,0,0.02393600046634674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,32,2,128,1,float16,float16,0,0.022899200022220612
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,32,2,128,1,float16,fp8,0,0.02373439967632294
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,32,2,128,1,fp8,fp8,0,0.02380640059709549
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,32,4,128,1,float16,float16,0,0.02316479980945587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,32,4,128,1,float16,fp8,0,0.024033600091934205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,32,4,128,1,fp8,fp8,0,0.023895999789237975
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,32,8,128,1,float16,float16,0,0.024454399943351746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,32,8,128,1,float16,fp8,0,0.024723200500011443
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,32,8,128,1,fp8,fp8,0,0.02492000013589859
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,32,32,128,1,float16,float16,0,0.0272816002368927
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,32,32,128,1,float16,fp8,0,0.02789280116558075
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,32,32,128,1,fp8,fp8,0,0.028067201375961304
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,32,1,128,1,float16,float16,0,0.02147199958562851
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,32,1,128,1,float16,fp8,0,0.02221599966287613
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,32,1,128,1,fp8,fp8,0,0.022188800573349
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,32,2,128,1,float16,float16,0,0.021495999395847322
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,32,2,128,1,float16,fp8,0,0.02234559953212738
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,32,2,128,1,fp8,fp8,0,0.022308799624443054
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,32,4,128,1,float16,float16,0,0.02173600047826767
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,32,4,128,1,float16,fp8,0,0.022291199862957002
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,32,4,128,1,fp8,fp8,0,0.022283199429512023
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,32,8,128,1,float16,float16,0,0.021902400255203246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,32,8,128,1,float16,fp8,0,0.022536000609397887
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,32,8,128,1,fp8,fp8,0,0.02279839962720871
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,32,32,128,1,float16,float16,0,0.022040000557899474
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,32,32,128,1,float16,fp8,0,0.022750400006771088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,32,32,128,1,fp8,fp8,0,0.022457599639892578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,32,1,128,1,float16,float16,0,0.02018879950046539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,32,1,128,1,float16,fp8,0,0.021198399364948273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,32,1,128,1,fp8,fp8,0,0.021033599972724915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,32,2,128,1,float16,float16,0,0.020577600598335265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,32,2,128,1,float16,fp8,0,0.021404799818992615
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,32,2,128,1,fp8,fp8,0,0.021249599754810333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,32,4,128,1,float16,float16,0,0.02082560062408447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,32,4,128,1,float16,fp8,0,0.02131199985742569
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,32,4,128,1,fp8,fp8,0,0.021792000532150267
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,32,8,128,1,float16,float16,0,0.020708799362182617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,32,8,128,1,float16,fp8,0,0.021222400665283202
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,32,8,128,1,fp8,fp8,0,0.021558399498462676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,32,1,128,1,float16,float16,0,0.2051487922668457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,32,1,128,1,float16,fp8,0,0.21967039108276368
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,32,1,128,1,fp8,fp8,0,0.21904480457305908
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,32,2,128,1,float16,float16,0,0.22395520210266112
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,32,2,128,1,float16,fp8,0,0.23811519145965576
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,32,2,128,1,fp8,fp8,0,0.23787200450897217
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,32,4,128,1,float16,float16,0,0.25979840755462646
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,32,4,128,1,float16,fp8,0,0.2729520082473755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,32,4,128,1,fp8,fp8,0,0.2746543884277344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,32,8,128,1,float16,float16,0,0.3326143980026245
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,32,8,128,1,float16,fp8,0,0.3441231966018677
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,32,8,128,1,fp8,fp8,0,0.34813120365142824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,32,32,128,1,float16,float16,0,0.45846238136291506
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,32,32,128,1,float16,fp8,0,0.4689727783203125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,32,1,128,1,float16,float16,0,0.11345119476318359
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,32,32,128,1,fp8,fp8,0,0.469649600982666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,32,1,128,1,float16,fp8,0,0.12390400171279907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,32,4,128,1,float16,fp8,0,0.14926079511642457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,32,1,128,1,fp8,fp8,0,0.12382880449295045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,32,2,128,1,float16,float16,0,0.12177439928054809
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,32,2,128,1,float16,fp8,0,0.1317728042602539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,32,2,128,1,fp8,fp8,0,0.13115839958190917
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,32,4,128,1,float16,float16,0,0.14050400257110596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,32,4,128,1,fp8,fp8,0,0.15007519721984863
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,32,8,128,1,float16,float16,0,0.17811360359191894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,32,8,128,1,float16,fp8,0,0.18466720581054688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,32,8,128,1,fp8,fp8,0,0.1852671980857849
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,32,32,128,1,float16,float16,0,0.24287040233612062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,32,32,128,1,float16,fp8,0,0.24498400688171387
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,32,32,128,1,fp8,fp8,0,0.24508318901062012
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,32,1,128,1,float16,float16,0,0.0688368022441864
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,32,1,128,1,float16,fp8,0,0.07148320078849793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,32,1,128,1,fp8,fp8,0,0.071424001455307
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,32,2,128,1,float16,float16,0,0.07497280240058898
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,32,2,128,1,float16,fp8,0,0.0777728021144867
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,32,2,128,1,fp8,fp8,0,0.07740960121154786
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,32,4,128,1,float16,float16,0,0.08373600244522095
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,32,4,128,1,float16,fp8,0,0.08532000184059144
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,32,32,128,1,float16,fp8,0,0.13112479448318481
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,32,4,128,1,fp8,fp8,0,0.08468480110168457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,32,8,128,1,float16,float16,0,0.10178719758987427
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,32,8,128,1,float16,fp8,0,0.10334399938583375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,32,8,128,1,fp8,fp8,0,0.10318559408187866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,32,32,128,1,float16,float16,0,0.13287680149078368
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,32,32,128,1,fp8,fp8,0,0.13137439489364625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,32,1,128,1,float16,float16,0,0.03847520053386688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,32,1,128,1,float16,fp8,0,0.0400191992521286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,32,1,128,1,fp8,fp8,0,0.039812800288200376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,32,2,128,1,float16,float16,0,0.039577600359916684
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,32,2,128,1,float16,fp8,0,0.040759998559951785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,32,2,128,1,fp8,fp8,0,0.04106079936027527
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,32,4,128,1,float16,float16,0,0.048028799891471866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,32,4,128,1,float16,fp8,0,0.045788800716400145
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,32,4,128,1,fp8,fp8,0,0.04551360011100769
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,32,8,128,1,float16,float16,0,0.05965120196342468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,32,8,128,1,float16,fp8,0,0.05365279912948608
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,32,8,128,1,fp8,fp8,0,0.05353760123252869
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,32,32,128,1,float16,float16,0,0.07418400049209595
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,32,32,128,1,float16,fp8,0,0.0664031982421875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,32,32,128,1,fp8,fp8,0,0.06588799953460693
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,32,1,128,1,float16,float16,0,0.02738400101661682
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,32,1,128,1,float16,fp8,0,0.028935998678207397
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,32,1,128,1,fp8,fp8,0,0.02881760001182556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,32,2,128,1,float16,float16,0,0.02774080038070679
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,32,2,128,1,float16,fp8,0,0.029080000519752503
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,32,2,128,1,fp8,fp8,0,0.02908959984779358
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,32,4,128,1,float16,float16,0,0.028806400299072266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,32,4,128,1,float16,fp8,0,0.030031999945640563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,32,4,128,1,fp8,fp8,0,0.0303056001663208
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,32,8,128,1,float16,float16,0,0.03359200060367584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,32,8,128,1,float16,fp8,0,0.03442400097846985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,32,8,128,1,fp8,fp8,0,0.03441759943962097
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,32,32,128,1,float16,float16,0,0.039508798718452455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,32,32,128,1,float16,fp8,0,0.04084320068359375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,32,32,128,1,fp8,fp8,0,0.04081439971923828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,32,1,128,1,float16,float16,0,0.02322559952735901
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,32,1,128,1,float16,fp8,0,0.02385759949684143
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,32,1,128,1,fp8,fp8,0,0.023657600581645965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,32,2,128,1,float16,float16,0,0.023219199478626253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,32,2,128,1,float16,fp8,0,0.023996800184249878
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,32,2,128,1,fp8,fp8,0,0.023940800130367278
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,32,4,128,1,float16,float16,0,0.02340960055589676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,32,4,128,1,float16,fp8,0,0.024054400622844696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,32,4,128,1,fp8,fp8,0,0.024246400594711302
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,32,8,128,1,float16,float16,0,0.02388319969177246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,32,8,128,1,float16,fp8,0,0.025070399045944214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,32,8,128,1,fp8,fp8,0,0.02499680072069168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,32,32,128,1,float16,float16,0,0.027136000990867614
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,32,32,128,1,float16,fp8,0,0.027884799242019653
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,32,32,128,1,fp8,fp8,0,0.027902400493621825
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,32,1,128,1,float16,float16,0,0.02139520049095154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,32,1,128,1,float16,fp8,0,0.021935999393463135
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,32,1,128,1,fp8,fp8,0,0.022206400334835053
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,32,2,128,1,float16,float16,0,0.021547199785709382
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,32,2,128,1,float16,fp8,0,0.022732800245285033
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,32,2,128,1,fp8,fp8,0,0.022598400712013245
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,32,4,128,1,float16,float16,0,0.02182080000638962
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,32,4,128,1,float16,fp8,0,0.022486400604248048
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,32,4,128,1,fp8,fp8,0,0.02218240052461624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,32,8,128,1,float16,float16,0,0.02191520035266876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,32,8,128,1,float16,fp8,0,0.02248319983482361
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,32,8,128,1,fp8,fp8,0,0.02258400022983551
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,32,32,128,1,float16,float16,0,0.02178560048341751
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,32,32,128,1,float16,fp8,0,0.022462399303913118
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,32,32,128,1,fp8,fp8,0,0.022419199347496033
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,32,1,128,1,float16,float16,0,0.020452800393104553
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,32,1,128,1,float16,fp8,0,0.020972800254821778
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,32,1,128,1,fp8,fp8,0,0.02123039960861206
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,32,2,128,1,float16,float16,0,0.020318399369716644
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,32,2,128,1,float16,fp8,0,0.021404799818992615
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,32,2,128,1,fp8,fp8,0,0.02129279971122742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,32,4,128,1,float16,float16,0,0.020550400018692017
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,32,4,128,1,float16,fp8,0,0.02168480008840561
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,32,4,128,1,fp8,fp8,0,0.021459199488162994
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,32,8,128,1,float16,float16,0,0.020897600054740905
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,32,8,128,1,float16,fp8,0,0.021542400121688843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,32,8,128,1,fp8,fp8,0,0.02154559940099716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,32,32,128,1,float16,float16,0,0.020561599731445314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,32,32,128,1,float16,fp8,0,0.02173919975757599
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,32,32,128,1,fp8,fp8,0,0.02106879949569702
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,32,1,128,1,float16,float16,0,0.019875200092792512
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,32,1,128,1,float16,fp8,0,0.020556800067424774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,32,1,128,1,fp8,fp8,0,0.020641599595546723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,32,2,128,1,float16,float16,0,0.019950400292873382
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,32,2,128,1,float16,fp8,0,0.02061759978532791
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,32,2,128,1,fp8,fp8,0,0.02046239972114563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,32,4,128,1,float16,float16,0,0.020214399695396422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,32,4,128,1,float16,fp8,0,0.020585599541664123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,32,4,128,1,fp8,fp8,0,0.02067359983921051
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,32,8,128,1,float16,float16,0,0.020193600654602052
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,32,8,128,1,float16,fp8,0,0.020924800634384157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,32,8,128,1,fp8,fp8,0,0.02090719938278198
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,32,1,128,1,float16,float16,0,0.11472959518432617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,32,1,128,1,float16,fp8,0,0.12465280294418335
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,32,1,128,1,fp8,fp8,0,0.1254639983177185
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,32,2,128,1,float16,float16,0,0.12267359495162963
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,32,2,128,1,float16,fp8,0,0.13288639783859252
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,32,2,128,1,fp8,fp8,0,0.13265600204467773
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,32,4,128,1,float16,float16,0,0.141211199760437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,32,4,128,1,float16,fp8,0,0.15049439668655396
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,32,4,128,1,fp8,fp8,0,0.15124959945678712
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,32,8,128,1,float16,float16,0,0.17683039903640746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,32,8,128,1,float16,fp8,0,0.1857103943824768
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,32,8,128,1,fp8,fp8,0,0.18538719415664673
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,32,32,128,1,float16,float16,0,0.302294397354126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,32,32,128,1,float16,fp8,0,0.31750240325927737
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,32,32,128,1,fp8,fp8,0,0.31780960559844973
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,32,1,128,1,float16,float16,0,0.06893439888954163
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,32,1,128,1,float16,fp8,0,0.0725600004196167
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,32,1,128,1,fp8,fp8,0,0.07243040204048157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,32,2,128,1,float16,float16,0,0.07531200051307678
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,32,2,128,1,float16,fp8,0,0.07822880148887634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,32,2,128,1,fp8,fp8,0,0.0782975971698761
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,32,4,128,1,float16,float16,0,0.08376479744911194
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,32,4,128,1,float16,fp8,0,0.08631680011749268
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,32,4,128,1,fp8,fp8,0,0.08578879833221435
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,32,8,128,1,float16,float16,0,0.10187679529190063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,32,8,128,1,float16,fp8,0,0.10375679731369018
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,32,8,128,1,fp8,fp8,0,0.10403039455413818
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,32,32,128,1,float16,float16,0,0.16234879493713378
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,32,32,128,1,float16,fp8,0,0.16915680170059205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,32,32,128,1,fp8,fp8,0,0.16881760358810424
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,32,1,128,1,float16,float16,0,0.03931840062141419
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,32,1,128,1,float16,fp8,0,0.04015200138092041
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,32,1,128,1,fp8,fp8,0,0.04026240110397339
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,32,2,128,1,float16,float16,0,0.04065439999103546
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,32,2,128,1,float16,fp8,0,0.0412880003452301
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,32,2,128,1,fp8,fp8,0,0.0411296010017395
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,32,4,128,1,float16,float16,0,0.04690400063991547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,32,4,128,1,float16,fp8,0,0.046489599347114566
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,32,4,128,1,fp8,fp8,0,0.04650560021400452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,32,8,128,1,float16,float16,0,0.06027519702911377
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,32,8,128,1,float16,fp8,0,0.054067200422286986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,32,8,128,1,fp8,fp8,0,0.05450559854507446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,32,32,128,1,float16,float16,0,0.08994399905204772
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,32,32,128,1,float16,fp8,0,0.08536319732666016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,32,32,128,1,fp8,fp8,0,0.08515679836273193
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,32,1,128,1,float16,float16,0,0.027772799134254456
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,32,1,128,1,float16,fp8,0,0.029209598898887634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,32,1,128,1,fp8,fp8,0,0.029212799668312073
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,32,2,128,1,float16,float16,0,0.02799200117588043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,32,2,128,1,float16,fp8,0,0.029319998621940613
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,32,2,128,1,fp8,fp8,0,0.029631999135017396
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,32,4,128,1,float16,float16,0,0.0290367990732193
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,32,4,128,1,float16,fp8,0,0.030329599976539612
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,32,4,128,1,fp8,fp8,0,0.030403199791908263
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,32,8,128,1,float16,float16,0,0.033748799562454225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,32,8,128,1,float16,fp8,0,0.03478240072727203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,32,8,128,1,fp8,fp8,0,0.03468160033226013
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,32,32,128,1,float16,float16,0,0.04749920070171356
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,32,32,128,1,float16,fp8,0,0.050040000677108766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,32,32,128,1,fp8,fp8,0,0.049369600415229795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,32,1,128,1,float16,float16,0,0.023119999468326567
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,32,1,128,1,float16,fp8,0,0.023596799373626708
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,32,1,128,1,fp8,fp8,0,0.02404640018939972
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,32,2,128,1,float16,float16,0,0.02330400049686432
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,32,2,128,1,float16,fp8,0,0.023950399458408357
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,32,2,128,1,fp8,fp8,0,0.023904000222682954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,32,4,128,1,float16,float16,0,0.023580799996852874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,32,4,128,1,float16,fp8,0,0.023950399458408357
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,32,4,128,1,fp8,fp8,0,0.02428639978170395
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,32,8,128,1,float16,float16,0,0.024191999435424806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,32,8,128,1,float16,fp8,0,0.02534720003604889
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,32,8,128,1,fp8,fp8,0,0.025028800964355467
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,32,32,128,1,float16,float16,0,0.03081600069999695
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,32,32,128,1,float16,fp8,0,0.03238719999790192
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,32,32,128,1,fp8,fp8,0,0.03255040049552917
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,32,1,128,1,float16,float16,0,0.021385599672794343
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,32,1,128,1,float16,fp8,0,0.02216159999370575
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,32,1,128,1,fp8,fp8,0,0.022176000475883483
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,32,2,128,1,float16,float16,0,0.02170239984989166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,32,2,128,1,float16,fp8,0,0.022448000311851502
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,32,2,128,1,fp8,fp8,0,0.0225055992603302
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,32,4,128,1,float16,float16,0,0.021563200652599333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,32,4,128,1,float16,fp8,0,0.022460800409317017
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,32,4,128,1,fp8,fp8,0,0.022401599586009978
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,32,8,128,1,float16,float16,0,0.021942399442195892
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,32,8,128,1,float16,fp8,0,0.022804799675941467
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,32,8,128,1,fp8,fp8,0,0.022627200186252593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,32,32,128,1,float16,float16,0,0.021817600727081297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,32,32,128,1,float16,fp8,0,0.0227743998169899
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,32,32,128,1,fp8,fp8,0,0.022707200050354003
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,32,1,128,1,float16,float16,0,0.02038560062646866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,32,1,128,1,float16,fp8,0,0.02126079946756363
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,32,1,128,1,fp8,fp8,0,0.02112800031900406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,32,2,128,1,float16,float16,0,0.020721599459648132
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,32,2,128,1,float16,fp8,0,0.021129600703716278
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,32,2,128,1,fp8,fp8,0,0.02112800031900406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,32,4,128,1,float16,float16,0,0.02072640061378479
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,32,4,128,1,float16,fp8,0,0.021296000480651854
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,32,4,128,1,fp8,fp8,0,0.021731199324131013
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,32,8,128,1,float16,float16,0,0.020654399693012238
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,32,8,128,1,float16,fp8,0,0.02152000069618225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,32,8,128,1,fp8,fp8,0,0.021649600565433504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,32,32,128,1,float16,float16,0,0.020467199385166168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,32,32,128,1,float16,fp8,0,0.021484799683094025
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,32,32,128,1,fp8,fp8,0,0.02158239930868149
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,32,1,128,1,float16,float16,0,0.01966720074415207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,32,1,128,1,float16,fp8,0,0.02073120027780533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,32,1,128,1,fp8,fp8,0,0.020654399693012238
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,32,2,128,1,float16,float16,0,0.019980800151824952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,32,2,128,1,float16,fp8,0,0.02090719938278198
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,32,2,128,1,fp8,fp8,0,0.020500800013542174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,32,4,128,1,float16,float16,0,0.019923199713230134
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,32,4,128,1,float16,fp8,0,0.021240000426769257
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,32,4,128,1,fp8,fp8,0,0.02048639953136444
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,32,8,128,1,float16,float16,0,0.019969600439071655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,32,8,128,1,float16,fp8,0,0.020971199870109557
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,32,8,128,1,fp8,fp8,0,0.021081599593162536
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,32,32,128,1,float16,float16,0,0.02030719965696335
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,32,32,128,1,float16,fp8,0,0.020980800688266753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,32,32,128,1,fp8,fp8,0,0.02093600034713745
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,32,1,128,1,float16,float16,0,0.019620800018310548
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,32,1,128,1,float16,fp8,0,0.020304000377655028
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,32,1,128,1,fp8,fp8,0,0.020454399287700653
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,32,2,128,1,float16,float16,0,0.019603200256824493
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,32,2,128,1,float16,fp8,0,0.020185600221157073
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,32,2,128,1,fp8,fp8,0,0.020588800311088562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,32,4,128,1,float16,float16,0,0.019662399590015412
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,32,1,128,1,float16,float16,0,0.07085760235786438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,32,4,128,1,float16,fp8,0,0.02024639993906021
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,32,4,128,1,fp8,fp8,0,0.020521600544452668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,32,8,128,1,float16,float16,0,0.019734400510787963
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,32,8,128,1,float16,fp8,0,0.02080159932374954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,32,8,128,1,fp8,fp8,0,0.020340800285339355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,32,1,128,1,float16,fp8,0,0.0735264003276825
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,32,1,128,1,fp8,fp8,0,0.07350559830665589
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,32,4,128,1,float16,fp8,0,0.08699039816856384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,32,2,128,1,float16,float16,0,0.07705119848251343
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,32,2,128,1,float16,fp8,0,0.07945280075073242
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,32,2,128,1,fp8,fp8,0,0.07937440276145935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,32,4,128,1,float16,float16,0,0.08451039791107177
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,32,4,128,1,fp8,fp8,0,0.08716319799423218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,32,8,128,1,float16,float16,0,0.1336143970489502
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,32,8,128,1,float16,fp8,0,0.14032959938049316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,32,8,128,1,fp8,fp8,0,0.1412287950515747
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,32,32,128,1,float16,float16,0,0.22298879623413087
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,32,32,128,1,float16,fp8,0,0.24415040016174316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,32,32,128,1,fp8,fp8,0,0.24563360214233398
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,32,1,128,1,float16,float16,0,0.039427199959754945
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,32,1,128,1,float16,fp8,0,0.04156480133533478
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,32,1,128,1,fp8,fp8,0,0.04128639996051788
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,32,2,128,1,float16,float16,0,0.04142079949378967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,32,2,128,1,float16,fp8,0,0.04252319931983948
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,32,2,128,1,fp8,fp8,0,0.042422398924827576
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,32,8,128,1,float16,fp8,0,0.07250080108642579
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,32,4,128,1,float16,float16,0,0.04859040081501007
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,32,4,128,1,float16,fp8,0,0.047547200322151185
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,32,4,128,1,fp8,fp8,0,0.04763999879360199
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,32,8,128,1,float16,float16,0,0.0752336025238037
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,32,1,128,1,fp8,fp8,0,0.029819199442863466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,32,8,128,1,fp8,fp8,0,0.07306079864501953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,32,32,128,1,float16,float16,0,0.12020800113677979
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,32,32,128,1,float16,fp8,0,0.12222559452056884
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,32,32,128,1,fp8,fp8,0,0.12260479927062988
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,32,1,128,1,float16,float16,0,0.02815360128879547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,32,1,128,1,float16,fp8,0,0.029899200797080992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,32,2,128,1,float16,float16,0,0.028488001227378844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,32,2,128,1,float16,fp8,0,0.029982399940490723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,32,8,128,1,float16,fp8,0,0.04396960139274597
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,32,2,128,1,fp8,fp8,0,0.030193600058555602
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,32,4,128,1,float16,float16,0,0.02934719920158386
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,32,4,128,1,float16,fp8,0,0.030849599838256837
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,32,4,128,1,fp8,fp8,0,0.031137600541114807
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,32,8,128,1,float16,float16,0,0.040838399529457094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,32,8,128,1,fp8,fp8,0,0.04391840100288391
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,32,32,128,1,float16,float16,0,0.06318560242652893
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,32,32,128,1,float16,fp8,0,0.06854559779167176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,32,32,128,1,fp8,fp8,0,0.06848639845848084
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,32,1,128,1,float16,float16,0,0.02383359968662262
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,32,1,128,1,float16,fp8,0,0.024398399889469145
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,32,1,128,1,fp8,fp8,0,0.024486400187015533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,32,2,128,1,float16,float16,0,0.02372319996356964
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,32,2,128,1,float16,fp8,0,0.024038399755954742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,32,2,128,1,fp8,fp8,0,0.02452320009469986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,32,4,128,1,float16,float16,0,0.023689599335193635
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,32,4,128,1,float16,fp8,0,0.024424000084400176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,32,4,128,1,fp8,fp8,0,0.024692800641059876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,32,8,128,1,float16,float16,0,0.028115200996398925
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,32,8,128,1,float16,fp8,0,0.029844799637794496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,32,8,128,1,fp8,fp8,0,0.02959679961204529
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,32,32,128,1,float16,float16,0,0.03819519877433777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,32,32,128,1,float16,fp8,0,0.04138399958610535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,32,32,128,1,fp8,fp8,0,0.04149279892444611
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,32,1,128,1,float16,float16,0,0.021512000262737273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,32,4,128,1,float16,float16,0,0.02146880030632019
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,32,1,128,1,float16,fp8,0,0.02234079986810684
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,32,1,128,1,fp8,fp8,0,0.022280000150203705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,32,2,128,1,float16,float16,0,0.02146880030632019
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,32,2,128,1,float16,fp8,0,0.022299200296401978
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,32,2,128,1,fp8,fp8,0,0.022284799814224245
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,32,4,128,1,float16,fp8,0,0.02232639938592911
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,32,4,128,1,fp8,fp8,0,0.022511999309062957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,32,8,128,1,float16,float16,0,0.021646399796009064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,32,8,128,1,float16,fp8,0,0.02245279997587204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,32,8,128,1,fp8,fp8,0,0.022439999878406523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,32,32,128,1,float16,float16,0,0.02576960027217865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,32,32,128,1,float16,fp8,0,0.027134400606155396
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,32,32,128,1,fp8,fp8,0,0.027143999934196472
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,32,1,128,1,float16,float16,0,0.02046239972114563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,32,1,128,1,float16,fp8,0,0.021724799275398256
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,32,1,128,1,fp8,fp8,0,0.021267199516296388
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,32,2,128,1,float16,float16,0,0.020446400344371795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,32,2,128,1,float16,fp8,0,0.021078400313854218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,32,2,128,1,fp8,fp8,0,0.021465599536895752
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,32,4,128,1,float16,float16,0,0.020945599675178526
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,32,32,128,1,float16,float16,0,0.020640000700950623
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,32,32,128,1,float16,fp8,0,0.021588799357414246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,32,4,128,1,float16,fp8,0,0.021768000721931458
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,32,4,128,1,fp8,fp8,0,0.02176479995250702
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,32,8,128,1,float16,float16,0,0.02083200067281723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,32,8,128,1,float16,fp8,0,0.02147199958562851
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,32,8,128,1,fp8,fp8,0,0.021547199785709382
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,32,32,128,1,fp8,fp8,0,0.021512000262737273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,32,1,128,1,float16,float16,0,0.01976799964904785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,32,1,128,1,float16,fp8,0,0.020395199954509734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,32,1,128,1,fp8,fp8,0,0.020712000131607056
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,32,2,128,1,float16,float16,0,0.020257599651813507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,32,2,128,1,float16,fp8,0,0.02104319930076599
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,32,2,128,1,fp8,fp8,0,0.02056480050086975
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,32,4,128,1,float16,float16,0,0.019937600195407867
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,32,4,128,1,float16,fp8,0,0.02099040001630783
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,32,4,128,1,fp8,fp8,0,0.02075680047273636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,32,8,128,1,float16,float16,0,0.02011840045452118
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,32,1,128,1,float16,float16,0,0.019566400349140166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,32,8,128,1,float16,fp8,0,0.020870399475097657
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,32,8,128,1,fp8,fp8,0,0.021115200221538545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,32,32,128,1,float16,float16,0,0.020454399287700653
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,32,32,128,1,float16,fp8,0,0.021367999911308288
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,32,32,128,1,fp8,fp8,0,0.02117920070886612
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,32,1,128,1,float16,fp8,0,0.020436799526214598
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,32,1,128,1,fp8,fp8,0,0.02024960070848465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,32,2,128,1,float16,float16,0,0.01958560049533844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,32,2,128,1,float16,fp8,0,0.020390400290489198
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,32,2,128,1,fp8,fp8,0,0.02027360051870346
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,32,4,128,1,float16,float16,0,0.019785599410533906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,32,4,128,1,float16,fp8,0,0.02065120041370392
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,32,4,128,1,fp8,fp8,0,0.020524799823760986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,32,8,128,1,float16,float16,0,0.019550399482250215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,32,8,128,1,float16,fp8,0,0.020334400236606598
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,32,8,128,1,fp8,fp8,0,0.02070080041885376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,32,32,128,1,float16,float16,0,0.020022399723529816
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,32,32,128,1,float16,fp8,0,0.02133760005235672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,32,32,128,1,fp8,fp8,0,0.021217599511146545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,32,1,128,1,float16,float16,0,0.019156800210475923
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,32,1,128,1,float16,fp8,0,0.020284800231456755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,32,1,128,1,fp8,fp8,0,0.020028799772262573
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,32,2,128,1,float16,float16,0,0.019288000464439393
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,32,2,128,1,float16,fp8,0,0.02032800018787384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,32,2,128,1,fp8,fp8,0,0.020132799446582795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,32,4,128,1,float16,float16,0,0.019543999433517457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,32,4,128,1,float16,fp8,0,0.020393599569797517
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,32,4,128,1,fp8,fp8,0,0.020371200144290925
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,32,8,128,1,float16,float16,0,0.019574399292469024
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,32,8,128,1,float16,fp8,0,0.020110400021076204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,32,8,128,1,fp8,fp8,0,0.020075200498104094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,32,1,128,1,float16,float16,0,0.02978079915046692
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,32,1,128,1,float16,fp8,0,0.031384000182151796
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,32,1,128,1,fp8,fp8,0,0.031188800930976868
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,32,2,128,1,float16,float16,0,0.03699679970741272
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,32,8,128,1,float16,float16,0,0.08038560152053834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,32,2,128,1,float16,fp8,0,0.04005120098590851
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,32,2,128,1,fp8,fp8,0,0.040212801098823546
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,32,4,128,1,float16,float16,0,0.0519711971282959
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,32,4,128,1,float16,fp8,0,0.05794399976730347
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,32,4,128,1,fp8,fp8,0,0.05805919766426086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,32,8,128,1,float16,fp8,0,0.09388319849967956
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,32,8,128,1,fp8,fp8,0,0.09416319727897644
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,32,32,128,1,float16,float16,0,0.13628000020980835
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,32,32,128,1,float16,fp8,0,0.1651792049407959
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,32,32,128,1,fp8,fp8,0,0.16490399837493896
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,32,1,128,1,float16,float16,0,0.02294880002737045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,32,1,128,1,float16,fp8,0,0.024025599658489227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,32,1,128,1,fp8,fp8,0,0.02406879961490631
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,32,2,128,1,float16,float16,0,0.026638400554656983
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,32,2,128,1,float16,fp8,0,0.028300800919532777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,32,2,128,1,fp8,fp8,0,0.028220799565315247
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,32,4,128,1,float16,float16,0,0.03439520001411438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,32,4,128,1,float16,fp8,0,0.037601599097251893
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,32,4,128,1,fp8,fp8,0,0.03749440014362335
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,32,8,128,1,float16,float16,0,0.04886879920959473
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,32,8,128,1,float16,fp8,0,0.055364799499511716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,32,8,128,1,fp8,fp8,0,0.05507680177688599
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,32,1,128,1,fp8,fp8,0,0.022006399929523468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,32,32,128,1,float16,float16,0,0.07807360291481018
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,32,32,128,1,float16,fp8,0,0.09061279892921448
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,32,32,128,1,fp8,fp8,0,0.09131360054016113
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,32,1,128,1,float16,float16,0,0.02112800031900406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,32,1,128,1,float16,fp8,0,0.021907199919223786
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,32,2,128,1,float16,float16,0,0.021668800711631776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,32,2,128,1,float16,fp8,0,0.02235199958086014
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,32,2,128,1,fp8,fp8,0,0.022305600345134735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,32,4,128,1,float16,float16,0,0.02531839907169342
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,32,4,128,1,float16,fp8,0,0.026494398713111877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,32,4,128,1,fp8,fp8,0,0.026895999908447266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,32,8,128,1,float16,float16,0,0.03301759958267212
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,32,8,128,1,float16,fp8,0,0.03588480055332184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,32,8,128,1,fp8,fp8,0,0.035913598537445066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,32,32,128,1,float16,float16,0,0.04721119999885559
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,32,32,128,1,float16,fp8,0,0.05346400141716003
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,32,32,128,1,fp8,fp8,0,0.053934401273727416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,32,1,128,1,float16,float16,0,0.019889600574970245
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,32,1,128,1,float16,fp8,0,0.020735999941825865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,32,1,128,1,fp8,fp8,0,0.021033599972724915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,32,2,128,1,float16,float16,0,0.020059199631214143
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,32,2,128,1,float16,fp8,0,0.021331200003623964
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,32,2,128,1,fp8,fp8,0,0.021022400259971617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,32,4,128,1,float16,float16,0,0.02059040069580078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,32,4,128,1,float16,fp8,0,0.021639999747276307
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,32,4,128,1,fp8,fp8,0,0.021425600349903106
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,32,8,128,1,float16,float16,0,0.024294400215148927
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,32,8,128,1,float16,fp8,0,0.026131200790405273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,32,8,128,1,fp8,fp8,0,0.02585279941558838
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,32,32,128,1,float16,float16,0,0.0319599986076355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,32,32,128,1,float16,fp8,0,0.034918400645256045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,32,32,128,1,fp8,fp8,0,0.034944000840187076
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,32,1,128,1,float16,float16,0,0.019468800723552705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,32,1,128,1,float16,fp8,0,0.020164799690246583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,32,1,128,1,fp8,fp8,0,0.02017119973897934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,32,2,128,1,float16,float16,0,0.01967200040817261
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,32,2,128,1,float16,fp8,0,0.020172800123691558
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,32,2,128,1,fp8,fp8,0,0.02048799991607666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,32,4,128,1,float16,float16,0,0.01961279958486557
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,32,4,128,1,float16,fp8,0,0.020559999346733093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,32,4,128,1,fp8,fp8,0,0.02067999988794327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,32,8,128,1,float16,float16,0,0.020102399587631225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,32,8,128,1,float16,fp8,0,0.02107200026512146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,32,8,128,1,fp8,fp8,0,0.020787200331687926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,32,32,128,1,float16,float16,0,0.024233600497245787
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,32,32,128,1,float16,fp8,0,0.025488001108169556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,32,32,128,1,fp8,fp8,0,0.025536000728607178
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,32,1,128,1,float16,float16,0,0.019249600172042847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,32,1,128,1,float16,fp8,0,0.020052799582481386
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,32,1,128,1,fp8,fp8,0,0.020151999592781068
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,32,2,128,1,float16,float16,0,0.019303999841213226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,32,2,128,1,float16,fp8,0,0.020336000621318816
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,32,2,128,1,fp8,fp8,0,0.02025440037250519
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,32,4,128,1,float16,float16,0,0.019543999433517457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,32,4,128,1,float16,fp8,0,0.020131200551986694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,32,4,128,1,fp8,fp8,0,0.020284800231456755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,32,8,128,1,float16,float16,0,0.01987839937210083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,32,8,128,1,float16,fp8,0,0.02051360011100769
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,32,8,128,1,fp8,fp8,0,0.020904000103473663
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,32,32,128,1,float16,float16,0,0.019823999702930452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,32,32,128,1,float16,fp8,0,0.020846399664878845
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,32,32,128,1,fp8,fp8,0,0.020945599675178526
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,32,2,128,1,float16,fp8,0,0.019972799718379973
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,32,1,128,1,float16,float16,0,0.01881919950246811
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,32,1,128,1,float16,fp8,0,0.019852800667285918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,32,1,128,1,fp8,fp8,0,0.01976799964904785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,32,2,128,1,float16,float16,0,0.01905120015144348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,32,2,128,1,fp8,fp8,0,0.019849599897861482
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,32,4,128,1,float16,float16,0,0.01919199973344803
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,32,4,128,1,float16,fp8,0,0.019948799908161164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,32,4,128,1,fp8,fp8,0,0.02008160054683685
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,32,8,128,1,float16,float16,0,0.019252799451351166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,32,8,128,1,float16,fp8,0,0.02044160068035126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,32,8,128,1,fp8,fp8,0,0.02009119987487793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,32,32,128,1,float16,float16,0,0.019760000705718993
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,32,32,128,1,float16,fp8,0,0.02052319943904877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,32,32,128,1,fp8,fp8,0,0.0203232005238533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,32,1,128,1,float16,float16,0,0.017209599912166595
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,32,1,128,1,float16,fp8,0,0.017791999876499175
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,32,1,128,1,fp8,fp8,0,0.017716799676418305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,32,2,128,1,float16,float16,0,0.01921280026435852
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,32,2,128,1,float16,fp8,0,0.019678400456905366
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,32,2,128,1,fp8,fp8,0,0.019835199415683746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,32,4,128,1,float16,float16,0,0.019113600254058838
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,32,4,128,1,float16,fp8,0,0.01966399997472763
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,32,4,128,1,fp8,fp8,0,0.019726400077342988
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,32,8,128,1,float16,float16,0,0.01886080056428909
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,32,8,128,1,float16,fp8,0,0.019995200634002685
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,32,8,128,1,fp8,fp8,0,0.0203247994184494
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,32,32,128,1,float16,float16,0,0.019054399430751802
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,32,32,128,1,float16,fp8,0,0.020175999402999877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,32,32,128,1,fp8,fp8,0,0.02003999948501587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,32,1,128,1,float16,float16,0,0.01640959978103638
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,32,1,128,1,float16,fp8,0,0.017078399658203125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,32,1,128,1,fp8,fp8,0,0.017131200432777403
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,32,2,128,1,float16,float16,0,0.016859200596809388
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,32,2,128,1,float16,fp8,0,0.01794400066137314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,32,2,128,1,fp8,fp8,0,0.017748799920082093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,32,4,128,1,float16,float16,0,0.01907680034637451
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,32,4,128,1,float16,fp8,0,0.019334399700164796
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,32,4,128,1,fp8,fp8,0,0.019249600172042847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,32,8,128,1,float16,float16,0,0.018796800076961516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,32,8,128,1,float16,fp8,0,0.019433599710464478
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,32,8,128,1,fp8,fp8,0,0.019526399672031403
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,24,1,128,1,float16,fp8,0,19.192860412597657
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,24,1,128,1,fp8,fp8,0,19.151875305175782
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,24,2,128,1,float16,fp8,0,19.642103576660155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,24,2,128,1,fp8,fp8,0,19.42615203857422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,24,4,128,1,float16,fp8,0,19.48723449707031
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,24,1,128,1,float16,float16,0,22.772547912597656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,24,2,128,1,float16,float16,0,22.9285400390625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,24,4,128,1,float16,float16,0,23.428923034667967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,24,24,128,1,float16,float16,0,12.71836166381836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,24,24,128,1,float16,fp8,0,11.227129364013672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,24,24,128,1,fp8,fp8,0,11.148748779296875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,24,1,128,1,float16,float16,0,11.811116790771484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,24,4,128,1,fp8,fp8,0,20.18115692138672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,24,8,128,1,fp8,fp8,0,20.7246826171875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,24,8,128,1,float16,fp8,0,21.04236602783203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,24,1,128,1,fp8,fp8,0,9.681817626953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,24,8,128,1,float16,float16,0,25.255047607421876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,24,1,128,1,float16,fp8,0,10.23570556640625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,24,2,128,1,float16,float16,0,11.586974334716796
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,24,2,128,1,float16,fp8,0,9.769003295898438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,24,2,128,1,fp8,fp8,0,9.708914947509765
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,24,4,128,1,float16,fp8,0,9.974143981933594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,24,4,128,1,float16,float16,0,11.806558227539062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,24,24,128,1,float16,float16,0,6.616203308105469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,24,4,128,1,fp8,fp8,0,9.836341094970702
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,24,8,128,1,float16,fp8,0,10.00888442993164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,24,24,128,1,float16,fp8,0,5.579556655883789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,24,8,128,1,float16,float16,0,12.054393768310547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,24,8,128,1,fp8,fp8,0,9.978376007080078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,24,24,128,1,fp8,fp8,0,5.631556701660156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,24,1,128,1,float16,fp8,0,4.827875137329102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,24,1,128,1,fp8,fp8,0,4.802470397949219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,24,1,128,1,float16,float16,0,5.777990341186523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,24,2,128,1,float16,fp8,0,4.824382400512695
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,24,2,128,1,float16,float16,0,5.681273651123047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,24,2,128,1,fp8,fp8,0,4.8876289367675785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,24,4,128,1,float16,float16,0,5.750907135009766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,24,4,128,1,float16,fp8,0,4.918289566040039
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,24,4,128,1,fp8,fp8,0,4.892025756835937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,24,24,128,1,float16,float16,0,3.159292793273926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,24,8,128,1,float16,float16,0,5.913123321533203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,24,8,128,1,float16,fp8,0,5.010915374755859
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,24,24,128,1,float16,fp8,0,2.9227392196655275
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,24,8,128,1,fp8,fp8,0,5.032185745239258
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,24,1,128,1,float16,float16,0,2.553780746459961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,24,24,128,1,fp8,fp8,0,2.806257629394531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,24,1,128,1,float16,fp8,0,2.4399135589599608
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,24,1,128,1,fp8,fp8,0,2.4742624282836916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,24,2,128,1,float16,float16,0,2.6203472137451174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,24,2,128,1,float16,fp8,0,2.4037616729736326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,24,2,128,1,fp8,fp8,0,2.402115249633789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,24,4,128,1,float16,fp8,0,2.4443231582641602
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,24,4,128,1,fp8,fp8,0,2.4577327728271485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,24,4,128,1,float16,float16,0,2.9725391387939455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,24,8,128,1,float16,float16,0,2.8587167739868162
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,24,8,128,1,float16,fp8,0,2.5241775512695312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,24,8,128,1,fp8,fp8,0,2.6091472625732424
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,24,1,128,1,float16,fp8,0,10.839518737792968
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,24,1,128,1,fp8,fp8,0,11.17816619873047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,24,1,128,1,float16,float16,0,12.932814025878907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,24,2,128,1,float16,fp8,0,11.189545440673829
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,24,2,128,1,fp8,fp8,0,11.179739379882813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,24,4,128,1,float16,fp8,0,11.227877044677735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,24,2,128,1,float16,float16,0,13.369268798828125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,24,4,128,1,float16,float16,0,13.399189758300782
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,24,24,128,1,float16,fp8,0,6.6615135192871096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,24,24,128,1,float16,float16,0,7.756755065917969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,24,24,128,1,fp8,fp8,0,6.757891082763672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,24,1,128,1,float16,float16,0,6.5222930908203125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,24,4,128,1,fp8,fp8,0,11.22616958618164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,24,8,128,1,float16,fp8,0,11.55957260131836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,24,8,128,1,fp8,fp8,0,11.606715393066406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,24,8,128,1,float16,float16,0,13.84222869873047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,24,1,128,1,float16,fp8,0,5.504619216918945
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,24,1,128,1,fp8,fp8,0,5.509006500244141
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,24,2,128,1,float16,fp8,0,5.564388656616211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,24,2,128,1,fp8,fp8,0,5.477487945556641
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,24,2,128,1,float16,float16,0,6.669321441650391
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,24,4,128,1,float16,fp8,0,5.610612869262695
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,24,4,128,1,float16,float16,0,6.699038696289063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,24,4,128,1,fp8,fp8,0,5.800352096557617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,24,24,128,1,float16,float16,0,3.7717838287353516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,24,24,128,1,float16,fp8,0,3.4244014739990236
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,24,8,128,1,float16,fp8,0,5.910811233520508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,24,8,128,1,float16,float16,0,7.082603454589844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,24,8,128,1,fp8,fp8,0,5.9124000549316404
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,24,24,128,1,fp8,fp8,0,3.3587711334228514
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,24,1,128,1,float16,float16,0,3.3909168243408203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,24,1,128,1,float16,fp8,0,2.8248239517211915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,24,1,128,1,fp8,fp8,0,3.040603256225586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,24,2,128,1,float16,float16,0,3.2035873413085936
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,24,2,128,1,float16,fp8,0,2.7761215209960937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,24,2,128,1,fp8,fp8,0,2.8403215408325195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,24,4,128,1,float16,fp8,0,2.8102544784545898
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,24,4,128,1,float16,float16,0,3.2340606689453124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,24,4,128,1,fp8,fp8,0,2.90347843170166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,24,24,128,1,float16,float16,0,1.8793487548828125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,24,8,128,1,float16,fp8,0,2.9187583923339844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,24,8,128,1,float16,float16,0,3.5016719818115236
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,24,24,128,1,float16,fp8,0,1.726363182067871
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,24,8,128,1,fp8,fp8,0,2.9827808380126952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,24,24,128,1,fp8,fp8,0,1.7056240081787108
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,24,1,128,1,float16,float16,0,1.5175871849060059
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,24,1,128,1,float16,fp8,0,1.3713520050048829
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,24,1,128,1,fp8,fp8,0,1.4450160026550294
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,24,2,128,1,float16,float16,0,1.4526608467102051
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,24,2,128,1,float16,fp8,0,1.5156944274902344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,24,2,128,1,fp8,fp8,0,1.38646240234375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,24,4,128,1,float16,float16,0,1.5281663894653321
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,24,4,128,1,float16,fp8,0,1.4239359855651856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,24,4,128,1,fp8,fp8,0,1.5307168006896972
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,24,8,128,1,float16,float16,0,1.5822079658508301
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,24,8,128,1,float16,fp8,0,1.4677488327026367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,24,8,128,1,fp8,fp8,0,1.5823599815368652
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,24,1,128,1,float16,fp8,0,7.694481658935547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,24,1,128,1,fp8,fp8,0,7.739153289794922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,24,2,128,1,float16,fp8,0,7.7939613342285154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,24,1,128,1,float16,float16,0,9.202193450927734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,24,2,128,1,fp8,fp8,0,7.718852996826172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,24,4,128,1,float16,fp8,0,7.899136352539062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,24,2,128,1,float16,float16,0,9.221086120605468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,24,4,128,1,float16,float16,0,9.471826934814453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,24,24,128,1,float16,fp8,0,4.871553421020508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,24,24,128,1,float16,float16,0,5.472643280029297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,24,24,128,1,fp8,fp8,0,4.9126640319824215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,24,1,128,1,float16,float16,0,4.540879821777343
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,24,4,128,1,fp8,fp8,0,7.925193786621094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,24,8,128,1,float16,fp8,0,8.317578887939453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,24,8,128,1,fp8,fp8,0,8.417269134521485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,24,8,128,1,float16,float16,0,9.784342193603516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,24,1,128,1,float16,fp8,0,3.8247344970703123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,24,1,128,1,fp8,fp8,0,3.8355422973632813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,24,2,128,1,float16,fp8,0,3.944793701171875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,24,2,128,1,float16,float16,0,4.463800048828125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,24,2,128,1,fp8,fp8,0,3.868425750732422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,24,4,128,1,float16,fp8,0,4.008232116699219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,24,4,128,1,float16,float16,0,4.654449462890625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,24,24,128,1,float16,float16,0,2.7216320037841797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,24,4,128,1,fp8,fp8,0,3.964081573486328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,24,8,128,1,float16,float16,0,4.7444816589355465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,24,8,128,1,float16,fp8,0,4.1574241638183596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,24,8,128,1,fp8,fp8,0,4.299126434326172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,24,24,128,1,float16,fp8,0,2.4735055923461915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,24,24,128,1,fp8,fp8,0,2.5305871963500977
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,24,1,128,1,float16,fp8,0,1.9215728759765625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,24,1,128,1,float16,float16,0,2.169424057006836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,24,1,128,1,fp8,fp8,0,2.001571273803711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,24,2,128,1,float16,float16,0,2.161819267272949
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,24,2,128,1,float16,fp8,0,1.9497264862060546
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,24,2,128,1,fp8,fp8,0,1.948289680480957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,24,4,128,1,float16,float16,0,2.2109888076782225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,24,4,128,1,float16,fp8,0,1.9931600570678711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,24,4,128,1,fp8,fp8,0,1.9977855682373047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,24,8,128,1,float16,float16,0,2.2944591522216795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,24,24,128,1,float16,float16,0,1.3596223831176757
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,24,24,128,1,float16,fp8,0,1.2555808067321776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,24,8,128,1,float16,fp8,0,2.075846481323242
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,24,8,128,1,fp8,fp8,0,2.191417694091797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,24,1,128,1,float16,float16,0,1.0143247604370118
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,24,24,128,1,fp8,fp8,0,1.3615535736083983
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,24,1,128,1,float16,fp8,0,0.9780464172363281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,24,1,128,1,fp8,fp8,0,1.0553839683532715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,24,2,128,1,float16,float16,0,1.046388816833496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,24,2,128,1,float16,fp8,0,0.9892191886901855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,24,2,128,1,fp8,fp8,0,0.9829263687133789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,24,4,128,1,float16,float16,0,1.0770928382873535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,24,4,128,1,float16,fp8,0,1.0676752090454102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,24,8,128,1,float16,float16,0,1.1259407997131348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,24,4,128,1,fp8,fp8,0,1.0235520362854005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,24,8,128,1,float16,fp8,0,1.093337631225586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,24,8,128,1,fp8,fp8,0,1.0604960441589355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,24,1,128,1,float16,fp8,0,9.910926055908202
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,24,1,128,1,fp8,fp8,0,10.031454467773438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,24,2,128,1,float16,fp8,0,10.201148986816406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,24,2,128,1,fp8,fp8,0,10.18424301147461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,24,1,128,1,float16,float16,0,11.902432250976563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,24,4,128,1,float16,fp8,0,10.401898956298828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,24,2,128,1,float16,float16,0,11.951713562011719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,24,4,128,1,float16,float16,0,12.210806274414063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,24,24,128,1,float16,fp8,0,6.628887939453125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,24,24,128,1,float16,float16,0,7.651163482666016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,24,1,128,1,float16,float16,0,5.884020614624023
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,24,24,128,1,fp8,fp8,0,6.664516448974609
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,24,4,128,1,fp8,fp8,0,10.363760375976563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,24,8,128,1,float16,fp8,0,11.04005126953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,24,8,128,1,fp8,fp8,0,11.003266906738281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,24,8,128,1,float16,float16,0,13.050761413574218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,24,1,128,1,float16,fp8,0,4.972808074951172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,24,1,128,1,fp8,fp8,0,5.119126510620117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,24,2,128,1,float16,fp8,0,5.053876876831055
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,24,2,128,1,float16,float16,0,5.923899078369141
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,24,2,128,1,fp8,fp8,0,5.0653327941894535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,24,4,128,1,float16,fp8,0,5.223164749145508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,24,4,128,1,float16,float16,0,6.207292938232422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,24,4,128,1,fp8,fp8,0,5.262654495239258
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,24,24,128,1,float16,fp8,0,3.3331230163574217
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,24,24,128,1,float16,float16,0,3.6537025451660154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,24,8,128,1,float16,float16,0,6.35893440246582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,24,8,128,1,float16,fp8,0,5.514033508300781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,24,8,128,1,fp8,fp8,0,5.4707073211669925
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,24,24,128,1,fp8,fp8,0,3.332044982910156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,24,1,128,1,float16,float16,0,2.9000223159790037
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,24,1,128,1,float16,fp8,0,2.505102348327637
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,24,1,128,1,fp8,fp8,0,2.487303924560547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,24,2,128,1,float16,float16,0,2.998948860168457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,24,2,128,1,float16,fp8,0,2.6027055740356446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,24,2,128,1,fp8,fp8,0,2.5664432525634764
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,24,4,128,1,float16,float16,0,2.9422048568725585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,24,4,128,1,float16,fp8,0,2.624660873413086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,24,4,128,1,fp8,fp8,0,2.7636608123779296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,24,8,128,1,float16,float16,0,3.176763153076172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,24,24,128,1,float16,float16,0,1.7779695510864257
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,24,8,128,1,float16,fp8,0,2.7292160034179687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,24,24,128,1,float16,fp8,0,1.7073375701904296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,24,1,128,1,float16,float16,0,1.3550895690917968
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,24,24,128,1,fp8,fp8,0,1.7450592041015625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,24,1,128,1,float16,fp8,0,1.298863983154297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,24,8,128,1,fp8,fp8,0,2.9029903411865234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,24,1,128,1,fp8,fp8,0,1.526640033721924
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,24,2,128,1,float16,float16,0,1.3638768196105957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,24,2,128,1,float16,fp8,0,1.2778160095214843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,24,2,128,1,fp8,fp8,0,1.2882800102233887
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,24,4,128,1,float16,float16,0,1.4423392295837403
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,24,4,128,1,float16,fp8,0,1.326524829864502
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,24,4,128,1,fp8,fp8,0,1.3233632087707519
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,24,8,128,1,float16,float16,0,1.473316764831543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,24,8,128,1,float16,fp8,0,1.4384112358093262
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,24,8,128,1,fp8,fp8,0,1.4413887977600097
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,24,24,128,1,float16,float16,0,0.8945648193359375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,24,24,128,1,float16,fp8,0,0.8515695571899414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,24,1,128,1,float16,float16,0,0.670527982711792
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,24,24,128,1,fp8,fp8,0,0.9068384170532227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,24,1,128,1,float16,fp8,0,0.6444352149963379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,24,1,128,1,fp8,fp8,0,0.6735455989837646
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,24,2,128,1,float16,float16,0,0.6953440189361573
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,24,2,128,1,float16,fp8,0,0.6551119804382324
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,24,2,128,1,fp8,fp8,0,0.6492815971374511
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,24,4,128,1,float16,float16,0,0.7008431911468506
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,24,4,128,1,float16,fp8,0,0.6799183845520019
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,24,4,128,1,fp8,fp8,0,0.6733439922332763
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,24,8,128,1,float16,float16,0,0.7485919952392578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,24,8,128,1,float16,fp8,0,0.7111135959625244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,24,8,128,1,fp8,fp8,0,0.712988805770874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,24,1,128,1,float16,fp8,0,5.749854278564453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,24,1,128,1,fp8,fp8,0,5.770904159545898
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,24,1,128,1,float16,float16,0,6.7348579406738285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,24,2,128,1,float16,fp8,0,5.89696159362793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,24,2,128,1,fp8,fp8,0,5.904163360595703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,24,2,128,1,float16,float16,0,6.9098960876464846
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,24,4,128,1,float16,float16,0,7.114739227294922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,24,24,128,1,float16,float16,0,4.53242073059082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,24,24,128,1,float16,fp8,0,4.185163116455078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,24,4,128,1,float16,fp8,0,6.112966537475586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,24,4,128,1,fp8,fp8,0,6.116862487792969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,24,24,128,1,fp8,fp8,0,4.243996810913086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,24,8,128,1,float16,fp8,0,6.54382553100586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,24,8,128,1,fp8,fp8,0,6.5188850402832035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,24,8,128,1,float16,float16,0,7.534950256347656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,24,1,128,1,float16,fp8,0,2.8716815948486327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,24,1,128,1,float16,float16,0,3.299622344970703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,24,1,128,1,fp8,fp8,0,2.8944799423217775
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,24,2,128,1,float16,float16,0,3.276367950439453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,24,2,128,1,float16,fp8,0,2.956086349487305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,24,2,128,1,fp8,fp8,0,2.9494495391845703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,24,4,128,1,float16,float16,0,3.4462047576904298
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,24,4,128,1,float16,fp8,0,3.1396799087524414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,24,4,128,1,fp8,fp8,0,3.0619888305664062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,24,24,128,1,float16,float16,0,2.229080009460449
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,24,24,128,1,float16,fp8,0,2.087915229797363
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,24,8,128,1,float16,fp8,0,3.25463981628418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,24,1,128,1,float16,float16,0,1.5707679748535157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,24,8,128,1,float16,float16,0,3.6983055114746093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,24,8,128,1,fp8,fp8,0,3.3557872772216797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,24,24,128,1,fp8,fp8,0,2.177769660949707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,24,1,128,1,float16,fp8,0,1.4522080421447754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,24,1,128,1,fp8,fp8,0,1.4807168006896974
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,24,2,128,1,float16,float16,0,1.585763168334961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,24,2,128,1,float16,fp8,0,1.479747200012207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,24,2,128,1,fp8,fp8,0,1.5837743759155274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,24,4,128,1,float16,float16,0,1.6553983688354492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,24,4,128,1,float16,fp8,0,1.5510160446166992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,24,4,128,1,fp8,fp8,0,1.5464032173156739
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,24,8,128,1,float16,float16,0,1.792911911010742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,24,8,128,1,float16,fp8,0,1.6538719177246093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,24,24,128,1,float16,float16,0,1.1347951889038086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,24,24,128,1,float16,fp8,0,1.070366382598877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,24,8,128,1,fp8,fp8,0,1.6944255828857422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,24,1,128,1,float16,float16,0,0.7691455841064453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,24,24,128,1,fp8,fp8,0,1.0905872344970704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,24,1,128,1,float16,fp8,0,0.7444831848144531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,24,1,128,1,fp8,fp8,0,0.786246395111084
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,24,2,128,1,float16,float16,0,0.7925712108612061
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,24,2,128,1,float16,fp8,0,0.7673600196838379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,24,2,128,1,fp8,fp8,0,0.7588255882263184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,24,4,128,1,float16,float16,0,0.8238479614257812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,24,4,128,1,float16,fp8,0,0.8003904342651367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,24,4,128,1,fp8,fp8,0,0.7870719909667969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,24,8,128,1,float16,float16,0,0.8843263626098633
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,24,8,128,1,float16,fp8,0,0.8507472038269043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,24,24,128,1,float16,float16,0,0.5890528202056885
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,24,1,128,1,float16,fp8,0,0.38345119953155515
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,24,8,128,1,fp8,fp8,0,0.839691162109375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,24,24,128,1,float16,fp8,0,0.5444767951965332
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,24,24,128,1,fp8,fp8,0,0.5439263820648194
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,24,1,128,1,float16,float16,0,0.3998016119003296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,24,4,128,1,float16,float16,0,0.42325119972229003
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,24,1,128,1,fp8,fp8,0,0.38128960132598877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,24,2,128,1,float16,float16,0,0.4082064151763916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,24,2,128,1,float16,fp8,0,0.39039199352264403
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,24,2,128,1,fp8,fp8,0,0.3919519901275635
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,24,4,128,1,float16,fp8,0,0.4051519870758057
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,24,4,128,1,fp8,fp8,0,0.40539040565490725
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,24,8,128,1,float16,fp8,0,0.4346144199371338
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,24,8,128,1,float16,float16,0,0.4495071887969971
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,24,8,128,1,fp8,fp8,0,0.43162240982055666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,24,1,128,1,float16,fp8,0,5.4542591094970705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,24,1,128,1,fp8,fp8,0,5.444075012207032
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,24,1,128,1,float16,float16,0,6.345366287231445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,24,2,128,1,float16,fp8,0,5.612273788452148
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,24,2,128,1,fp8,fp8,0,5.613183975219727
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,24,2,128,1,float16,float16,0,6.3613025665283205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,24,4,128,1,float16,fp8,0,5.863617706298828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,24,4,128,1,float16,float16,0,6.6310371398925785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,24,1,128,1,float16,float16,0,3.0930047988891602
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,24,24,128,1,float16,float16,0,4.652137756347656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,24,4,128,1,fp8,fp8,0,5.87982406616211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,24,24,128,1,float16,fp8,0,4.553803253173828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,24,24,128,1,fp8,fp8,0,4.368091201782226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,24,8,128,1,float16,fp8,0,6.4510963439941404
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,24,8,128,1,float16,float16,0,7.302823638916015
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,24,8,128,1,fp8,fp8,0,6.417033386230469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,24,1,128,1,float16,fp8,0,2.814852714538574
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,24,1,128,1,fp8,fp8,0,2.738148880004883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,24,2,128,1,float16,fp8,0,2.8067647933959963
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,24,2,128,1,float16,float16,0,3.153228759765625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,24,2,128,1,fp8,fp8,0,2.793142318725586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,24,4,128,1,float16,float16,0,3.299407958984375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,24,4,128,1,float16,fp8,0,2.935862350463867
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,24,4,128,1,fp8,fp8,0,2.9654415130615233
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,24,24,128,1,float16,float16,0,2.3226335525512694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,24,8,128,1,float16,float16,0,3.575649642944336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,24,8,128,1,float16,fp8,0,3.2426528930664062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,24,24,128,1,float16,fp8,0,2.299148750305176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,24,24,128,1,fp8,fp8,0,2.2021087646484374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,24,8,128,1,fp8,fp8,0,3.399524688720703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,24,1,128,1,float16,float16,0,1.440392017364502
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,24,1,128,1,float16,fp8,0,1.3990287780761719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,24,1,128,1,fp8,fp8,0,1.433296012878418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,24,2,128,1,float16,fp8,0,1.419156837463379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,24,2,128,1,float16,float16,0,1.500715160369873
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,24,2,128,1,fp8,fp8,0,1.414091205596924
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,24,4,128,1,float16,float16,0,1.5632911682128907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,24,4,128,1,float16,fp8,0,1.4848608016967773
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,24,4,128,1,fp8,fp8,0,1.5231504440307617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,24,8,128,1,float16,float16,0,1.7330896377563476
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,24,8,128,1,float16,fp8,0,1.638599967956543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,24,24,128,1,float16,fp8,0,1.119156837463379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,24,1,128,1,float16,float16,0,0.7317296028137207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,24,24,128,1,float16,float16,0,1.1983231544494628
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,24,8,128,1,fp8,fp8,0,1.6357168197631835
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,24,1,128,1,float16,fp8,0,0.7150479793548584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,24,24,128,1,fp8,fp8,0,1.1368528366088868
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,24,1,128,1,fp8,fp8,0,0.7677231788635254
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,24,2,128,1,float16,float16,0,0.7507311820983886
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,24,2,128,1,float16,fp8,0,0.7228591918945313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,24,2,128,1,fp8,fp8,0,0.7202688217163086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,24,4,128,1,float16,float16,0,0.7944143772125244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,24,4,128,1,float16,fp8,0,0.7615071773529053
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,24,4,128,1,fp8,fp8,0,0.7693327903747559
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,24,8,128,1,float16,float16,0,0.8646063804626465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,24,8,128,1,float16,fp8,0,0.83963041305542
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,24,8,128,1,fp8,fp8,0,0.836128044128418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,24,24,128,1,float16,float16,0,0.597492790222168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,24,24,128,1,float16,fp8,0,0.5686016082763672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,24,24,128,1,fp8,fp8,0,0.573361587524414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,24,1,128,1,float16,float16,0,0.38123040199279784
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,24,1,128,1,float16,fp8,0,0.3672559976577759
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,24,1,128,1,fp8,fp8,0,0.37056479454040525
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,24,2,128,1,float16,float16,0,0.3922111988067627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,24,2,128,1,float16,fp8,0,0.37594239711761473
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,24,2,128,1,fp8,fp8,0,0.37536799907684326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,24,4,128,1,float16,float16,0,0.40515360832214353
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,24,4,128,1,float16,fp8,0,0.3936847925186157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,24,4,128,1,fp8,fp8,0,0.39170401096343993
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,24,8,128,1,float16,float16,0,0.4435279846191406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,24,8,128,1,float16,fp8,0,0.4296448230743408
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,24,8,128,1,fp8,fp8,0,0.42838239669799805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,24,24,128,1,float16,float16,0,0.31048638820648194
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,24,24,128,1,float16,fp8,0,0.2991312026977539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,24,2,128,1,float16,fp8,0,0.19822399616241454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,24,24,128,1,fp8,fp8,0,0.2977344036102295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,24,1,128,1,float16,float16,0,0.20028159618377686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,24,1,128,1,float16,fp8,0,0.1877344012260437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,24,1,128,1,fp8,fp8,0,0.19014400243759155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,24,2,128,1,float16,float16,0,0.20701439380645753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,24,2,128,1,fp8,fp8,0,0.20002079010009766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,24,4,128,1,float16,float16,0,0.21618239879608153
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,24,4,128,1,float16,fp8,0,0.21035358905792237
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,24,4,128,1,fp8,fp8,0,0.2062688112258911
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,24,8,128,1,float16,float16,0,0.2355072021484375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,24,8,128,1,float16,fp8,0,0.22299039363861084
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,24,8,128,1,fp8,fp8,0,0.22590079307556152
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,24,1,128,1,float16,fp8,0,3.2755054473876952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,24,1,128,1,float16,float16,0,3.603134536743164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,24,1,128,1,fp8,fp8,0,3.211431884765625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,24,2,128,1,float16,fp8,0,3.3327518463134767
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,24,2,128,1,float16,float16,0,3.691916656494141
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,24,2,128,1,fp8,fp8,0,3.352403259277344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,24,4,128,1,float16,fp8,0,3.577067184448242
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,24,4,128,1,float16,float16,0,3.9367950439453123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,24,4,128,1,fp8,fp8,0,3.573369598388672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,24,1,128,1,float16,float16,0,1.8355119705200196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,24,24,128,1,float16,float16,0,3.021556854248047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,24,24,128,1,float16,fp8,0,2.875052833557129
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,24,8,128,1,float16,fp8,0,4.029391860961914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,24,8,128,1,float16,float16,0,4.359016036987304
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,24,8,128,1,fp8,fp8,0,3.994249725341797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,24,24,128,1,fp8,fp8,0,2.8777008056640625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,24,1,128,1,float16,fp8,0,1.6295663833618164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,24,1,128,1,fp8,fp8,0,1.671107292175293
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,24,2,128,1,float16,float16,0,1.7758144378662108
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,24,2,128,1,float16,fp8,0,1.6884576797485351
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,24,2,128,1,fp8,fp8,0,1.6889968872070313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,24,4,128,1,float16,float16,0,1.9232959747314453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,24,4,128,1,float16,fp8,0,1.7924320220947265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,24,4,128,1,fp8,fp8,0,1.7994928359985352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,24,8,128,1,float16,float16,0,2.1227184295654298
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,24,8,128,1,float16,fp8,0,2.023739242553711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,24,24,128,1,float16,float16,0,1.5228400230407715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,24,24,128,1,float16,fp8,0,1.4560447692871095
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,24,1,128,1,float16,float16,0,0.8590800285339355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,24,1,128,1,float16,fp8,0,0.8299167633056641
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,24,8,128,1,fp8,fp8,0,2.0727823257446287
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,24,24,128,1,fp8,fp8,0,1.4918272018432617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,24,1,128,1,fp8,fp8,0,0.8902480125427246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,24,2,128,1,float16,float16,0,0.8893712043762207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,24,2,128,1,float16,fp8,0,0.8553055763244629
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,24,2,128,1,fp8,fp8,0,0.873198413848877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,24,4,128,1,float16,float16,0,0.9367759704589844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,24,4,128,1,float16,fp8,0,0.915608024597168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,24,4,128,1,fp8,fp8,0,0.9191823959350586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,24,8,128,1,float16,float16,0,1.0624064445495605
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,24,8,128,1,float16,fp8,0,1.0258496284484864
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,24,8,128,1,fp8,fp8,0,1.017251205444336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,24,24,128,1,float16,float16,0,0.7763391971588135
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,24,24,128,1,float16,fp8,0,0.7398079872131348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,24,1,128,1,float16,float16,0,0.4452400207519531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,24,24,128,1,fp8,fp8,0,0.7422560214996338
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,24,1,128,1,float16,fp8,0,0.4291391849517822
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,24,1,128,1,fp8,fp8,0,0.42966399192810056
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,24,2,128,1,float16,float16,0,0.4567984104156494
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,24,2,128,1,float16,fp8,0,0.44345917701721194
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,24,2,128,1,fp8,fp8,0,0.44285120964050295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,24,4,128,1,float16,float16,0,0.48656158447265624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,24,4,128,1,float16,fp8,0,0.4731264114379883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,24,4,128,1,fp8,fp8,0,0.47340641021728513
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,24,8,128,1,float16,float16,0,0.5400847911834716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,24,8,128,1,float16,fp8,0,0.5211904048919678
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,24,24,128,1,fp8,fp8,0,0.3827008008956909
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,24,24,128,1,float16,float16,0,0.40268478393554685
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,24,8,128,1,fp8,fp8,0,0.5246592044830323
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,24,24,128,1,float16,fp8,0,0.381276798248291
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,24,1,128,1,float16,float16,0,0.2327359914779663
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,24,1,128,1,float16,fp8,0,0.22735040187835692
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,24,1,128,1,fp8,fp8,0,0.22400319576263428
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,24,2,128,1,float16,float16,0,0.24047679901123048
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,24,2,128,1,float16,fp8,0,0.23450078964233398
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,24,2,128,1,fp8,fp8,0,0.233787202835083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,24,4,128,1,float16,float16,0,0.25385279655456544
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,24,4,128,1,float16,fp8,0,0.24777278900146485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,24,4,128,1,fp8,fp8,0,0.250216007232666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,24,8,128,1,float16,float16,0,0.28377599716186525
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,24,8,128,1,float16,fp8,0,0.27418079376220705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,24,8,128,1,fp8,fp8,0,0.2744352102279663
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,24,24,128,1,float16,float16,0,0.21731679439544677
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,24,24,128,1,float16,fp8,0,0.20257599353790284
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,24,24,128,1,fp8,fp8,0,0.20323679447174073
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,24,1,128,1,float16,float16,0,0.12765439748764038
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,24,1,128,1,float16,fp8,0,0.12020800113677979
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,24,1,128,1,fp8,fp8,0,0.11896640062332153
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,24,4,128,1,fp8,fp8,0,0.13128639459609986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,24,2,128,1,float16,float16,0,0.13142720460891724
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,24,2,128,1,float16,fp8,0,0.12350239753723144
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,24,2,128,1,fp8,fp8,0,0.12431360483169555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,24,4,128,1,float16,float16,0,0.13907519578933716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,24,4,128,1,float16,fp8,0,0.13068640232086182
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,24,8,128,1,float16,float16,0,0.15575679540634155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,24,8,128,1,float16,fp8,0,0.14734239578247071
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,24,8,128,1,fp8,fp8,0,0.14541120529174806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,24,1,128,1,float16,float16,0,3.4374752044677734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,24,1,128,1,float16,fp8,0,3.188444709777832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,24,1,128,1,fp8,fp8,0,3.1968704223632813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,24,2,128,1,float16,float16,0,3.603628921508789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,24,2,128,1,float16,fp8,0,3.3334976196289063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,24,2,128,1,fp8,fp8,0,3.3611728668212892
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,24,4,128,1,float16,fp8,0,3.6352272033691406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,24,4,128,1,float16,float16,0,3.8418113708496096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,24,4,128,1,fp8,fp8,0,3.6938846588134764
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,24,1,128,1,float16,float16,0,1.701411247253418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,24,8,128,1,float16,fp8,0,4.216340637207031
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,24,8,128,1,float16,float16,0,4.492984008789063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,24,24,128,1,float16,float16,0,3.3850784301757812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,24,24,128,1,float16,fp8,0,3.2757217407226564
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,24,24,128,1,fp8,fp8,0,3.2675201416015627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,24,8,128,1,fp8,fp8,0,4.276473617553711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,24,1,128,1,float16,fp8,0,1.6179807662963868
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,24,1,128,1,fp8,fp8,0,1.667888069152832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,24,2,128,1,float16,float16,0,1.7610000610351562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,24,2,128,1,float16,fp8,0,1.7309200286865234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,24,2,128,1,fp8,fp8,0,1.6929887771606444
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,24,4,128,1,float16,float16,0,1.895742416381836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,24,4,128,1,float16,fp8,0,1.8337152481079102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,24,4,128,1,fp8,fp8,0,1.8441808700561524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,24,8,128,1,float16,float16,0,2.2047311782836916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,24,8,128,1,float16,fp8,0,2.1337152481079102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,24,1,128,1,float16,float16,0,0.8430543899536133
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,24,1,128,1,float16,fp8,0,0.824124813079834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,24,24,128,1,float16,float16,0,1.7125295639038085
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,24,8,128,1,fp8,fp8,0,2.1644256591796873
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,24,24,128,1,float16,fp8,0,1.657771110534668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,24,24,128,1,fp8,fp8,0,1.6992847442626953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,24,1,128,1,fp8,fp8,0,0.8445296287536621
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,24,2,128,1,float16,float16,0,0.8778304100036621
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,24,2,128,1,float16,fp8,0,0.8549504280090332
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,24,2,128,1,fp8,fp8,0,0.8613663673400879
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,24,4,128,1,float16,float16,0,0.9574799537658691
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,24,4,128,1,float16,fp8,0,0.9316752433776856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,24,4,128,1,fp8,fp8,0,0.9337056159973145
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,24,8,128,1,float16,float16,0,1.1038479804992676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,24,8,128,1,float16,fp8,0,1.0782032012939453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,24,1,128,1,float16,float16,0,0.43440799713134765
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,24,24,128,1,float16,float16,0,0.8556271553039551
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,24,8,128,1,fp8,fp8,0,1.078559970855713
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,24,24,128,1,float16,fp8,0,0.8424336433410644
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,24,24,128,1,fp8,fp8,0,0.845206356048584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,24,1,128,1,float16,fp8,0,0.42423200607299805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,24,1,128,1,fp8,fp8,0,0.4246784210205078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,24,2,128,1,float16,float16,0,0.45380001068115233
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,24,2,128,1,float16,fp8,0,0.43882079124450685
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,24,2,128,1,fp8,fp8,0,0.4425648212432861
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,24,4,128,1,float16,float16,0,0.48823041915893556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,24,4,128,1,float16,fp8,0,0.4802703857421875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,24,4,128,1,fp8,fp8,0,0.47936158180236815
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,24,8,128,1,float16,float16,0,0.5635839939117432
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,24,8,128,1,float16,fp8,0,0.5511136054992676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,24,8,128,1,fp8,fp8,0,0.5473120212554932
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,24,24,128,1,float16,float16,0,0.44608640670776367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,24,24,128,1,float16,fp8,0,0.4330463886260986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,24,24,128,1,fp8,fp8,0,0.43509759902954104
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,24,1,128,1,float16,float16,0,0.22866559028625488
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,24,1,128,1,float16,fp8,0,0.22647519111633302
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,24,1,128,1,fp8,fp8,0,0.22650721073150634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,24,2,128,1,float16,float16,0,0.23747038841247559
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,24,2,128,1,float16,fp8,0,0.23380160331726074
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,24,2,128,1,fp8,fp8,0,0.23362560272216798
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,24,4,128,1,float16,float16,0,0.25814080238342285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,24,4,128,1,float16,fp8,0,0.25223360061645506
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,24,4,128,1,fp8,fp8,0,0.2541599988937378
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,24,8,128,1,float16,float16,0,0.29463040828704834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,24,8,128,1,float16,fp8,0,0.28861279487609864
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,24,8,128,1,fp8,fp8,0,0.28597440719604494
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,24,24,128,1,float16,float16,0,0.23659999370574952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,24,24,128,1,float16,fp8,0,0.2272752046585083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,24,24,128,1,fp8,fp8,0,0.2291343927383423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,24,1,128,1,float16,float16,0,0.12492640018463134
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,24,1,128,1,float16,fp8,0,0.11916799545288086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,24,1,128,1,fp8,fp8,0,0.11918400526046753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,24,2,128,1,float16,float16,0,0.13201279640197755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,24,2,128,1,float16,fp8,0,0.12509759664535522
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,24,2,128,1,fp8,fp8,0,0.12682080268859863
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,24,4,128,1,float16,float16,0,0.139520001411438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,24,4,128,1,float16,fp8,0,0.13743679523468016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,24,4,128,1,fp8,fp8,0,0.1339951992034912
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,24,8,128,1,float16,float16,0,0.15953919887542725
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,24,8,128,1,float16,fp8,0,0.15572479963302613
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,24,8,128,1,fp8,fp8,0,0.15567200183868407
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,24,24,128,1,float16,float16,0,0.13408639430999755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,24,24,128,1,float16,fp8,0,0.12701599597930907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,24,24,128,1,fp8,fp8,0,0.12534400224685668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,24,1,128,1,float16,float16,0,0.0709231972694397
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,24,1,128,1,float16,fp8,0,0.06832640171051026
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,24,1,128,1,fp8,fp8,0,0.06909120082855225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,24,2,128,1,float16,float16,0,0.07186239957809448
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,24,2,128,1,float16,fp8,0,0.06987839937210083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,24,2,128,1,fp8,fp8,0,0.06972000002861023
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,24,4,128,1,float16,float16,0,0.07793599963188172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,24,4,128,1,float16,fp8,0,0.07464640140533448
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,24,4,128,1,fp8,fp8,0,0.07448319792747497
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,24,8,128,1,float16,float16,0,0.08773599863052368
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,24,8,128,1,float16,fp8,0,0.082150399684906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,24,8,128,1,fp8,fp8,0,0.08264639973640442
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,24,1,128,1,float16,float16,0,2.0307279586791993
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,24,1,128,1,float16,fp8,0,2.0047311782836914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,24,1,128,1,fp8,fp8,0,1.9924543380737305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,24,2,128,1,float16,float16,0,2.1600671768188477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,24,2,128,1,float16,fp8,0,2.099943923950195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,24,2,128,1,fp8,fp8,0,2.096932792663574
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,24,4,128,1,float16,float16,0,2.356831932067871
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,24,4,128,1,float16,fp8,0,2.3407871246337892
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,24,4,128,1,fp8,fp8,0,2.378096008300781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,24,8,128,1,float16,float16,0,2.80372314453125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,24,1,128,1,float16,float16,0,1.0342896461486817
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,24,8,128,1,float16,fp8,0,2.7623664855957033
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,24,24,128,1,float16,float16,0,2.3206384658813475
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,24,24,128,1,float16,fp8,0,2.2577888488769533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,24,8,128,1,fp8,fp8,0,2.866223907470703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,24,1,128,1,float16,fp8,0,1.0158495903015137
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,24,24,128,1,fp8,fp8,0,2.2979503631591798
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,24,1,128,1,fp8,fp8,0,1.0291983604431152
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,24,2,128,1,float16,float16,0,1.0853487968444824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,24,2,128,1,fp8,fp8,0,1.0561488151550293
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,24,2,128,1,float16,fp8,0,1.0636639595031738
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,24,4,128,1,float16,float16,0,1.1869520187377929
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,24,4,128,1,float16,fp8,0,1.1829584121704102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,24,4,128,1,fp8,fp8,0,1.182960033416748
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,24,8,128,1,float16,float16,0,1.4189536094665527
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,24,8,128,1,float16,fp8,0,1.4017760276794433
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,24,1,128,1,float16,float16,0,0.5265279769897461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,24,24,128,1,float16,float16,0,1.1678576469421387
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,24,8,128,1,fp8,fp8,0,1.4005552291870118
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,24,24,128,1,float16,fp8,0,1.1455327987670898
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,24,1,128,1,float16,fp8,0,0.5218527793884278
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,24,24,128,1,fp8,fp8,0,1.1556320190429688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,24,1,128,1,fp8,fp8,0,0.5209792137145997
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,24,2,128,1,float16,float16,0,0.5537024021148682
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,24,2,128,1,float16,fp8,0,0.5479184150695801
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,24,2,128,1,fp8,fp8,0,0.5397712230682373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,24,4,128,1,float16,float16,0,0.6105679988861084
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,24,4,128,1,float16,fp8,0,0.5985936164855957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,24,4,128,1,fp8,fp8,0,0.6048031806945801
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,24,8,128,1,float16,float16,0,0.7210639953613281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,24,8,128,1,float16,fp8,0,0.7136735916137695
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,24,24,128,1,float16,float16,0,0.5984367847442627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,24,8,128,1,fp8,fp8,0,0.71244797706604
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,24,24,128,1,float16,fp8,0,0.5851391792297364
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,24,1,128,1,float16,float16,0,0.2736304044723511
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,24,24,128,1,fp8,fp8,0,0.5905439853668213
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,24,1,128,1,float16,fp8,0,0.2720799922943115
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,24,1,128,1,fp8,fp8,0,0.2728111982345581
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,24,2,128,1,float16,float16,0,0.28978240489959717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,24,4,128,1,float16,fp8,0,0.31396639347076416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,24,2,128,1,float16,fp8,0,0.2862623929977417
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,24,2,128,1,fp8,fp8,0,0.28365440368652345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,24,4,128,1,float16,float16,0,0.31629600524902346
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,24,4,128,1,fp8,fp8,0,0.31295840740203856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,24,8,128,1,float16,float16,0,0.37694559097290037
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,24,8,128,1,float16,fp8,0,0.3691663980484009
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,24,8,128,1,fp8,fp8,0,0.36820800304412843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,24,24,128,1,float16,float16,0,0.3124351978302002
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,24,24,128,1,float16,fp8,0,0.3107615947723389
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,24,24,128,1,fp8,fp8,0,0.30838239192962646
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,24,1,128,1,float16,float16,0,0.1480288028717041
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,24,1,128,1,float16,fp8,0,0.1484928011894226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,24,1,128,1,fp8,fp8,0,0.14653120040893555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,24,2,128,1,float16,float16,0,0.15802240371704102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,24,2,128,1,float16,fp8,0,0.15536799430847167
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,24,2,128,1,fp8,fp8,0,0.15409280061721803
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,24,4,128,1,float16,float16,0,0.16964800357818605
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,24,4,128,1,float16,fp8,0,0.16826080083847045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,24,4,128,1,fp8,fp8,0,0.16884000301361085
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,24,8,128,1,float16,float16,0,0.19918240308761598
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,24,8,128,1,float16,fp8,0,0.195360004901886
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,24,8,128,1,fp8,fp8,0,0.19602240324020387
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,24,24,128,1,float16,float16,0,0.16807999610900878
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,24,24,128,1,float16,fp8,0,0.16953279972076415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,24,24,128,1,fp8,fp8,0,0.16843680143356324
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,24,1,128,1,float16,float16,0,0.08447840213775634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,24,1,128,1,float16,fp8,0,0.07909119725227357
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,24,1,128,1,fp8,fp8,0,0.07947999835014344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,24,2,128,1,float16,float16,0,0.08641600012779235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,24,2,128,1,float16,fp8,0,0.08180800080299377
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,24,2,128,1,fp8,fp8,0,0.08193439841270447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,24,4,128,1,float16,float16,0,0.09604480266571044
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,24,4,128,1,float16,fp8,0,0.09127519726753235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,24,4,128,1,fp8,fp8,0,0.09076640009880066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,24,8,128,1,float16,float16,0,0.10959039926528931
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,24,8,128,1,float16,fp8,0,0.10539679527282715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,24,8,128,1,fp8,fp8,0,0.10565760135650634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,24,24,128,1,float16,float16,0,0.09915680289268494
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,24,24,128,1,float16,fp8,0,0.09085919857025146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,24,24,128,1,fp8,fp8,0,0.09174879789352416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,24,1,128,1,float16,float16,0,0.05134080052375793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,24,1,128,1,float16,fp8,0,0.050809597969055174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,24,1,128,1,fp8,fp8,0,0.05044159889221191
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,24,2,128,1,float16,float16,0,0.051800000667572024
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,24,2,128,1,float16,fp8,0,0.0517520010471344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,24,2,128,1,fp8,fp8,0,0.05133919715881348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,24,4,128,1,float16,float16,0,0.05472639799118042
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,24,4,128,1,float16,fp8,0,0.05375199913978577
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,24,4,128,1,fp8,fp8,0,0.053548800945281985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,24,8,128,1,float16,float16,0,0.06238880157470703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,24,8,128,1,float16,fp8,0,0.06000480055809021
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,24,8,128,1,fp8,fp8,0,0.060190397500991824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,24,1,128,1,float16,float16,0,2.1110912322998048
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,24,1,128,1,float16,fp8,0,2.10620002746582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,24,1,128,1,fp8,fp8,0,2.1221439361572267
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,24,2,128,1,float16,float16,0,2.254670333862305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,24,2,128,1,float16,fp8,0,2.2541423797607423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,24,2,128,1,fp8,fp8,0,2.2596559524536133
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,24,4,128,1,float16,float16,0,2.5725168228149413
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,24,4,128,1,float16,fp8,0,2.562118339538574
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,24,4,128,1,fp8,fp8,0,2.583729553222656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,24,1,128,1,float16,float16,0,1.078932762145996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,24,8,128,1,float16,float16,0,3.166201591491699
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,24,8,128,1,float16,fp8,0,3.158051109313965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,24,24,128,1,float16,float16,0,2.7813743591308593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,24,8,128,1,fp8,fp8,0,3.167671966552734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,24,24,128,1,float16,fp8,0,2.7359216690063475
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,24,1,128,1,float16,fp8,0,1.0786128044128418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,24,1,128,1,fp8,fp8,0,1.077787208557129
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,24,24,128,1,fp8,fp8,0,2.786185646057129
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,24,2,128,1,float16,float16,0,1.1625840187072753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,24,2,128,1,float16,fp8,0,1.1380144119262696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,24,2,128,1,fp8,fp8,0,1.1395999908447265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,24,4,128,1,float16,float16,0,1.2990927696228027
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,24,4,128,1,float16,fp8,0,1.2901087760925294
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,24,4,128,1,fp8,fp8,0,1.2985456466674805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,24,8,128,1,float16,float16,0,1.5913215637207032
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,24,8,128,1,float16,fp8,0,1.592078399658203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,24,1,128,1,float16,float16,0,0.5505824089050293
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,24,8,128,1,fp8,fp8,0,1.5920944213867188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,24,24,128,1,fp8,fp8,0,1.3909664154052734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,24,24,128,1,float16,float16,0,1.3998224258422851
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,24,1,128,1,float16,fp8,0,0.5509200096130371
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,24,24,128,1,float16,fp8,0,1.3815695762634277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,24,1,128,1,fp8,fp8,0,0.5501200199127197
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,24,2,128,1,float16,float16,0,0.5858751773834229
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,24,2,128,1,float16,fp8,0,0.5841807842254638
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,24,2,128,1,fp8,fp8,0,0.5855535984039306
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,24,4,128,1,float16,float16,0,0.6629615783691406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,24,8,128,1,float16,float16,0,0.8064528465270996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,24,4,128,1,float16,fp8,0,0.6552944183349609
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,24,4,128,1,fp8,fp8,0,0.6612271785736084
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,24,8,128,1,float16,fp8,0,0.8054256439208984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,24,8,128,1,fp8,fp8,0,0.8083951950073243
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,24,24,128,1,float16,float16,0,0.7178431987762451
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,24,24,128,1,float16,fp8,0,0.7097280025482178
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,24,1,128,1,float16,float16,0,0.2854399919509888
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,24,24,128,1,fp8,fp8,0,0.7053552150726319
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,24,1,128,1,float16,fp8,0,0.2883039951324463
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,24,1,128,1,fp8,fp8,0,0.2864432096481323
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,24,2,128,1,float16,float16,0,0.30570240020751954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,24,2,128,1,float16,fp8,0,0.30483360290527345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,24,2,128,1,fp8,fp8,0,0.306659197807312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,24,4,128,1,float16,float16,0,0.34096479415893555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,24,4,128,1,float16,fp8,0,0.34054079055786135
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,24,4,128,1,fp8,fp8,0,0.3431936025619507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,24,8,128,1,float16,float16,0,0.41780638694763184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,24,24,128,1,float16,fp8,0,0.36619999408721926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,24,8,128,1,float16,fp8,0,0.41608481407165526
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,24,8,128,1,fp8,fp8,0,0.41425437927246095
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,24,24,128,1,float16,float16,0,0.3749680042266846
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,24,2,128,1,float16,float16,0,0.1655184030532837
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,24,24,128,1,fp8,fp8,0,0.3667471885681152
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,24,1,128,1,float16,float16,0,0.15828800201416016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,24,1,128,1,float16,fp8,0,0.15762399435043334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,24,1,128,1,fp8,fp8,0,0.15692479610443116
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,24,2,128,1,float16,fp8,0,0.16556639671325685
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,24,2,128,1,fp8,fp8,0,0.16501439809799195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,24,4,128,1,float16,float16,0,0.18400319814682006
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,24,4,128,1,float16,fp8,0,0.18316639661788942
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,24,4,128,1,fp8,fp8,0,0.18386240005493165
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,24,8,128,1,float16,float16,0,0.22215039730072023
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,24,8,128,1,float16,fp8,0,0.22076640129089356
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,24,8,128,1,fp8,fp8,0,0.2198080062866211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,24,24,128,1,float16,float16,0,0.20132160186767578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,24,24,128,1,float16,fp8,0,0.19677120447158813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,24,24,128,1,fp8,fp8,0,0.1970703959465027
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,24,1,128,1,float16,float16,0,0.08926079869270324
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,24,1,128,1,float16,fp8,0,0.08511840105056763
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,24,1,128,1,fp8,fp8,0,0.08667200207710266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,24,2,128,1,float16,float16,0,0.09638239741325379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,24,2,128,1,float16,fp8,0,0.09203839898109437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,24,2,128,1,fp8,fp8,0,0.09378560185432434
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,24,4,128,1,float16,float16,0,0.10410239696502685
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,24,4,128,1,float16,fp8,0,0.10192159414291382
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,24,4,128,1,fp8,fp8,0,0.10228639841079712
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,24,8,128,1,float16,float16,0,0.12279360294342041
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,24,8,128,1,float16,fp8,0,0.12235360145568848
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,24,8,128,1,fp8,fp8,0,0.12269760370254516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,24,24,128,1,float16,float16,0,0.1137887954711914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,24,24,128,1,float16,fp8,0,0.11071360111236572
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,24,24,128,1,fp8,fp8,0,0.1098863959312439
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,24,1,128,1,float16,float16,0,0.04965440034866333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,24,1,128,1,float16,fp8,0,0.0508575975894928
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,24,1,128,1,fp8,fp8,0,0.051528000831604005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,24,2,128,1,float16,float16,0,0.05174559950828552
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,24,2,128,1,float16,fp8,0,0.05221279859542847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,24,2,128,1,fp8,fp8,0,0.051963198184967044
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,24,4,128,1,float16,float16,0,0.057974398136138916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,24,4,128,1,float16,fp8,0,0.05755199790000916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,24,4,128,1,fp8,fp8,0,0.05749599933624268
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,24,8,128,1,float16,float16,0,0.06871680021286011
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,24,8,128,1,float16,fp8,0,0.06584960222244263
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,24,8,128,1,fp8,fp8,0,0.06569600105285645
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,24,24,128,1,float16,float16,0,0.06155359745025635
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,24,24,128,1,float16,fp8,0,0.06077759861946106
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,24,24,128,1,fp8,fp8,0,0.060438400506973265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,24,1,128,1,float16,float16,0,0.03416320085525513
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,24,1,128,1,float16,fp8,0,0.03489600121974945
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,24,1,128,1,fp8,fp8,0,0.03479360044002533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,24,2,128,1,float16,float16,0,0.034564799070358275
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,24,2,128,1,float16,fp8,0,0.035488000512123107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,24,2,128,1,fp8,fp8,0,0.03510879874229431
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,24,4,128,1,float16,float16,0,0.03529120087623596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,24,4,128,1,float16,fp8,0,0.037118399143218996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,24,4,128,1,fp8,fp8,0,0.03720319867134094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,24,8,128,1,float16,float16,0,0.03964959979057312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,24,8,128,1,float16,fp8,0,0.04184800088405609
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,24,8,128,1,fp8,fp8,0,0.04242079854011536
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,24,1,128,1,float16,float16,0,1.5235983848571777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,24,1,128,1,float16,fp8,0,1.562828826904297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,24,1,128,1,fp8,fp8,0,1.5524592399597168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,24,2,128,1,float16,float16,0,1.6634511947631836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,24,2,128,1,float16,fp8,0,1.696308708190918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,24,2,128,1,fp8,fp8,0,1.692977523803711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,24,4,128,1,float16,float16,0,1.965238380432129
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,24,4,128,1,float16,fp8,0,1.9978752136230469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,24,4,128,1,fp8,fp8,0,2.0058656692504884
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,24,8,128,1,float16,float16,0,2.5620304107666017
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,24,1,128,1,float16,float16,0,0.7738304138183594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,24,1,128,1,float16,fp8,0,0.7931168079376221
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,24,8,128,1,float16,fp8,0,2.593219184875488
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,24,8,128,1,fp8,fp8,0,2.593684768676758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,24,24,128,1,float16,float16,0,2.4776432037353517
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,24,24,128,1,float16,fp8,0,2.4783424377441405
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,24,1,128,1,fp8,fp8,0,0.7934703826904297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,24,2,128,1,float16,float16,0,0.846735954284668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,24,24,128,1,fp8,fp8,0,2.4838815689086915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,24,2,128,1,float16,fp8,0,0.8630543708801269
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,24,2,128,1,fp8,fp8,0,0.8608464241027832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,24,4,128,1,float16,float16,0,0.9972880363464356
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,24,4,128,1,float16,fp8,0,1.0107600212097168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,24,4,128,1,fp8,fp8,0,1.0108336448669433
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,24,8,128,1,float16,float16,0,1.2936240196228028
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,24,8,128,1,float16,fp8,0,1.3090720176696777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,24,8,128,1,fp8,fp8,0,1.3082127571105957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,24,1,128,1,float16,float16,0,0.39829120635986326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,24,24,128,1,float16,float16,0,1.25307035446167
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,24,1,128,1,float16,fp8,0,0.41001119613647463
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,24,24,128,1,float16,fp8,0,1.2523920059204101
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,24,1,128,1,fp8,fp8,0,0.40796961784362795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,24,2,128,1,float16,float16,0,0.4345248222351074
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,24,24,128,1,fp8,fp8,0,1.2532640457153321
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,24,2,128,1,float16,fp8,0,0.44160637855529783
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,24,2,128,1,fp8,fp8,0,0.4415616035461426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,24,4,128,1,float16,float16,0,0.5112239837646484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,24,4,128,1,float16,fp8,0,0.5166704177856445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,24,4,128,1,fp8,fp8,0,0.5167471885681152
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,24,8,128,1,float16,float16,0,0.6572000026702881
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,24,8,128,1,float16,fp8,0,0.6658192157745362
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,24,8,128,1,fp8,fp8,0,0.6644720077514649
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,24,24,128,1,float16,float16,0,0.6410751819610596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,24,24,128,1,float16,fp8,0,0.6407135963439942
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,24,1,128,1,float16,float16,0,0.21223680973052977
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,24,24,128,1,fp8,fp8,0,0.6401360034942627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,24,1,128,1,float16,fp8,0,0.21566400527954102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,24,1,128,1,fp8,fp8,0,0.21598560810089112
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,24,2,128,1,float16,float16,0,0.22952160835266114
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,24,2,128,1,float16,fp8,0,0.23347039222717286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,24,2,128,1,fp8,fp8,0,0.23419361114501952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,24,4,128,1,float16,float16,0,0.26766400337219237
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,24,4,128,1,float16,fp8,0,0.2705343961715698
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,24,4,128,1,fp8,fp8,0,0.269868803024292
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,24,8,128,1,float16,float16,0,0.34251039028167723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,24,8,128,1,float16,fp8,0,0.3437311887741089
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,24,8,128,1,fp8,fp8,0,0.34314560890197754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,24,24,128,1,float16,float16,0,0.33277599811553954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,24,1,128,1,float16,float16,0,0.1186560034751892
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,24,24,128,1,float16,fp8,0,0.3324912071228027
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,24,1,128,1,float16,fp8,0,0.12105439901351929
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,24,4,128,1,float16,float16,0,0.1457808017730713
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,24,24,128,1,fp8,fp8,0,0.3324959993362427
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,24,1,128,1,fp8,fp8,0,0.12088639736175537
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,24,2,128,1,float16,float16,0,0.12581119537353516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,24,2,128,1,float16,fp8,0,0.12797919511795045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,24,2,128,1,fp8,fp8,0,0.12921760082244874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,24,4,128,1,float16,fp8,0,0.14726560115814208
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,24,4,128,1,fp8,fp8,0,0.14680320024490356
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,24,8,128,1,float16,float16,0,0.18212000131607056
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,24,8,128,1,float16,fp8,0,0.18439040184020997
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,24,8,128,1,fp8,fp8,0,0.18349920511245726
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,24,24,128,1,float16,float16,0,0.17832640409469605
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,24,24,128,1,float16,fp8,0,0.17788959741592408
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,24,24,128,1,fp8,fp8,0,0.17893439531326294
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,24,1,128,1,float16,float16,0,0.06992160081863404
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,24,1,128,1,float16,fp8,0,0.06788319945335389
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,24,1,128,1,fp8,fp8,0,0.0679967999458313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,24,2,128,1,float16,float16,0,0.07561759948730469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,24,2,128,1,float16,fp8,0,0.07489280104637146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,24,2,128,1,fp8,fp8,0,0.07366880178451538
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,24,8,128,1,fp8,fp8,0,0.10423200130462647
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,24,4,128,1,float16,float16,0,0.08390880227088929
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,24,4,128,1,float16,fp8,0,0.08420000076293946
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,24,4,128,1,fp8,fp8,0,0.08448960185050965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,24,8,128,1,float16,float16,0,0.10249600410461426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,24,8,128,1,float16,fp8,0,0.10451200008392333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,24,24,128,1,float16,float16,0,0.10088640451431274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,24,24,128,1,float16,fp8,0,0.09845439791679382
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,24,24,128,1,fp8,fp8,0,0.0990880012512207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,24,1,128,1,float16,float16,0,0.04278239905834198
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,24,1,128,1,float16,fp8,0,0.041808000206947325
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,24,1,128,1,fp8,fp8,0,0.04190559983253479
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,24,2,128,1,float16,float16,0,0.04316000044345856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,24,2,128,1,float16,fp8,0,0.04275520145893097
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,24,2,128,1,fp8,fp8,0,0.04302079975605011
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,24,4,128,1,float16,float16,0,0.048547199368476866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,24,4,128,1,float16,fp8,0,0.048495998978614806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,24,4,128,1,fp8,fp8,0,0.048644798994064334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,24,8,128,1,float16,float16,0,0.06032639741897583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,24,8,128,1,float16,fp8,0,0.056467199325561525
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,24,8,128,1,fp8,fp8,0,0.056462401151657106
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,24,24,128,1,float16,float16,0,0.05851680040359497
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,24,24,128,1,float16,fp8,0,0.056811201572418216
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,24,24,128,1,fp8,fp8,0,0.056676799058914186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,24,1,128,1,float16,float16,0,0.030811199545860292
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,24,1,128,1,float16,fp8,0,0.030715200304985046
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,24,1,128,1,fp8,fp8,0,0.03081279993057251
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,24,2,128,1,float16,float16,0,0.030943998694419862
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,24,2,128,1,float16,fp8,0,0.031112000346183777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,24,2,128,1,fp8,fp8,0,0.03115679919719696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,24,4,128,1,float16,float16,0,0.03191519975662231
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,24,4,128,1,float16,fp8,0,0.033267199993133545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,24,4,128,1,fp8,fp8,0,0.03378399908542633
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,24,8,128,1,float16,float16,0,0.0363072007894516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,24,8,128,1,float16,fp8,0,0.038308799266815186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,24,8,128,1,fp8,fp8,0,0.03852640092372894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,24,24,128,1,float16,float16,0,0.0368800014257431
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,24,24,128,1,float16,fp8,0,0.03862079977989197
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,24,24,128,1,fp8,fp8,0,0.0385343998670578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,24,1,128,1,float16,float16,0,0.027350398898124694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,24,1,128,1,float16,fp8,0,0.028747200965881348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,24,1,128,1,fp8,fp8,0,0.028604799509048463
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,24,2,128,1,float16,float16,0,0.02755039930343628
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,24,2,128,1,float16,fp8,0,0.029241600632667543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,24,2,128,1,fp8,fp8,0,0.02836799919605255
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,24,4,128,1,float16,float16,0,0.027702400088310243
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,24,4,128,1,float16,fp8,0,0.02884480059146881
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,24,4,128,1,fp8,fp8,0,0.02933120131492615
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,24,8,128,1,float16,float16,0,0.028531199693679808
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,24,8,128,1,float16,fp8,0,0.03048959970474243
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,24,8,128,1,fp8,fp8,0,0.030529600381851197
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,24,1,128,1,float16,float16,0,0.6289072036743164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,24,1,128,1,float16,fp8,0,0.6563759803771972
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,24,1,128,1,fp8,fp8,0,0.655460786819458
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,24,2,128,1,float16,float16,0,0.7044447898864746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,24,2,128,1,float16,fp8,0,0.7271008014678955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,24,2,128,1,fp8,fp8,0,0.7300015926361084
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,24,4,128,1,float16,float16,0,0.8506447792053222
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,24,4,128,1,float16,fp8,0,0.8811519622802735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,24,4,128,1,fp8,fp8,0,0.8792271614074707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,24,8,128,1,float16,float16,0,1.148799991607666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,24,8,128,1,float16,fp8,0,1.1809552192687989
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,24,8,128,1,fp8,fp8,0,1.1789055824279786
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,24,1,128,1,float16,float16,0,0.3280128002166748
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,24,24,128,1,float16,float16,0,1.1827199935913086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,24,1,128,1,float16,fp8,0,0.3413952112197876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,24,24,128,1,float16,fp8,0,1.1657983779907226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,24,1,128,1,fp8,fp8,0,0.3432607889175415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,24,24,128,1,fp8,fp8,0,1.1664928436279296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,24,2,128,1,float16,float16,0,0.3640415906906128
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,24,2,128,1,float16,fp8,0,0.37730720043182375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,24,2,128,1,fp8,fp8,0,0.3779119968414307
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,24,4,128,1,float16,float16,0,0.4363696098327637
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,24,4,128,1,float16,fp8,0,0.4508815765380859
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,24,4,128,1,fp8,fp8,0,0.45258560180664065
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,24,8,128,1,float16,float16,0,0.5882287979125976
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,24,8,128,1,float16,fp8,0,0.6011487960815429
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,24,8,128,1,fp8,fp8,0,0.6009744167327881
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,24,24,128,1,float16,float16,0,0.6050191879272461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,24,24,128,1,float16,fp8,0,0.5938975811004639
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,24,1,128,1,float16,float16,0,0.1768447995185852
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,24,24,128,1,fp8,fp8,0,0.5960095882415771
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,24,1,128,1,float16,fp8,0,0.18284319639205932
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,24,1,128,1,fp8,fp8,0,0.18360799551010132
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,24,2,128,1,float16,float16,0,0.19431040287017823
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,24,2,128,1,float16,fp8,0,0.20096321105957032
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,24,2,128,1,fp8,fp8,0,0.2020944118499756
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,24,4,128,1,float16,float16,0,0.23054559230804444
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,24,4,128,1,float16,fp8,0,0.23928000926971435
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,24,4,128,1,fp8,fp8,0,0.2386255979537964
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,24,8,128,1,float16,float16,0,0.3057391881942749
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,24,8,128,1,float16,fp8,0,0.3111920118331909
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,24,8,128,1,fp8,fp8,0,0.3111743927001953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,24,24,128,1,float16,float16,0,0.31507039070129395
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,24,24,128,1,float16,fp8,0,0.309771203994751
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,24,24,128,1,fp8,fp8,0,0.30926880836486814
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,24,1,128,1,float16,float16,0,0.10069760084152221
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,24,1,128,1,float16,fp8,0,0.10514559745788574
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,24,4,128,1,float16,fp8,0,0.1312608003616333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,24,1,128,1,fp8,fp8,0,0.10476800203323364
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,24,2,128,1,float16,float16,0,0.10810240507125854
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,24,2,128,1,float16,fp8,0,0.11260639429092408
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,24,2,128,1,fp8,fp8,0,0.1132207989692688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,24,4,128,1,float16,float16,0,0.12748479843139648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,24,4,128,1,fp8,fp8,0,0.13133920431137086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,24,8,128,1,float16,float16,0,0.16384799480438234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,24,8,128,1,float16,fp8,0,0.1673151969909668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,24,8,128,1,fp8,fp8,0,0.1669919967651367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,24,24,128,1,float16,float16,0,0.16939519643783568
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,24,24,128,1,float16,fp8,0,0.16558239459991456
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,24,24,128,1,fp8,fp8,0,0.16628799438476563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,24,1,128,1,float16,float16,0,0.060759997367858885
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,24,1,128,1,float16,fp8,0,0.060452800989151
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,24,1,128,1,fp8,fp8,0,0.05897120237350464
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,24,2,128,1,float16,float16,0,0.06678079962730407
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,24,2,128,1,float16,fp8,0,0.06537920236587524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,24,2,128,1,fp8,fp8,0,0.06678239703178405
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,24,4,128,1,float16,float16,0,0.07491999864578247
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,24,4,128,1,float16,fp8,0,0.0752784013748169
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,24,4,128,1,fp8,fp8,0,0.07559040188789368
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,24,8,128,1,float16,float16,0,0.09366239905357361
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,24,8,128,1,float16,fp8,0,0.09581120014190674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,24,8,128,1,fp8,fp8,0,0.0953279972076416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,24,24,128,1,float16,float16,0,0.09524639844894409
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,24,24,128,1,float16,fp8,0,0.09131360054016113
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,24,24,128,1,fp8,fp8,0,0.09150879979133605
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,24,1,128,1,float16,float16,0,0.03732160031795502
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,24,1,128,1,float16,fp8,0,0.03819839954376221
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,24,1,128,1,fp8,fp8,0,0.038108798861503604
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,24,2,128,1,float16,float16,0,0.03834559917449951
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,24,2,128,1,float16,fp8,0,0.039166399836540224
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,24,2,128,1,fp8,fp8,0,0.03899520039558411
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,24,4,128,1,float16,float16,0,0.04398559927940369
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,24,4,128,1,float16,fp8,0,0.04381119906902313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,24,4,128,1,fp8,fp8,0,0.04396319985389709
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,24,8,128,1,float16,float16,0,0.05477920174598694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,24,8,128,1,float16,fp8,0,0.05299199819564819
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,24,8,128,1,fp8,fp8,0,0.052908802032470705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,24,24,128,1,float16,float16,0,0.05283520221710205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,24,24,128,1,float16,fp8,0,0.0508512020111084
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,24,24,128,1,fp8,fp8,0,0.05124639868736267
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,24,1,128,1,float16,float16,0,0.02874560058116913
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,24,1,128,1,float16,fp8,0,0.030531200766563415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,24,1,128,1,fp8,fp8,0,0.030379199981689455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,24,2,128,1,float16,float16,0,0.029096001386642457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,24,2,128,1,float16,fp8,0,0.030929601192474364
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,24,2,128,1,fp8,fp8,0,0.030806401371955873
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,24,4,128,1,float16,float16,0,0.029924800992012023
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,24,4,128,1,float16,fp8,0,0.03186880052089691
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,24,4,128,1,fp8,fp8,0,0.031856000423431396
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,24,8,128,1,float16,float16,0,0.03458240032196045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,24,8,128,1,float16,fp8,0,0.03651680052280426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,24,8,128,1,fp8,fp8,0,0.036657598614692685
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,24,24,128,1,float16,float16,0,0.033899199962615964
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,24,24,128,1,float16,fp8,0,0.034760001301765445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,24,24,128,1,fp8,fp8,0,0.03462719917297363
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,24,1,128,1,float16,float16,0,0.024318400025367736
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,24,1,128,1,float16,fp8,0,0.025367999076843263
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,24,1,128,1,fp8,fp8,0,0.02542400062084198
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,24,2,128,1,float16,float16,0,0.02442079931497574
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,24,2,128,1,float16,fp8,0,0.025278401374816895
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,24,2,128,1,fp8,fp8,0,0.025147199630737305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,24,4,128,1,float16,float16,0,0.0245728000998497
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,24,4,128,1,float16,fp8,0,0.02555519938468933
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,24,4,128,1,fp8,fp8,0,0.02558560073375702
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,24,8,128,1,float16,float16,0,0.025723201036453248
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,24,8,128,1,float16,fp8,0,0.026617598533630372
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,24,8,128,1,fp8,fp8,0,0.02640160024166107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,24,24,128,1,float16,float16,0,0.026316800713539125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,24,24,128,1,float16,fp8,0,0.02720640003681183
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,24,24,128,1,fp8,fp8,0,0.027502399682998658
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,24,1,128,1,float16,float16,0,0.023151999711990355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,24,1,128,1,float16,fp8,0,0.023814399540424348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,24,1,128,1,fp8,fp8,0,0.023963199555873872
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,24,2,128,1,float16,float16,0,0.023160000145435334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,24,2,128,1,float16,fp8,0,0.023945599794387817
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,24,2,128,1,fp8,fp8,0,0.024164800345897675
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,24,4,128,1,float16,float16,0,0.02306559979915619
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,24,4,128,1,fp8,fp8,0,0.024099199473857878
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,24,4,128,1,float16,fp8,0,0.023929600417613984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,24,8,128,1,float16,float16,0,0.023230400681495667
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,24,8,128,1,float16,fp8,0,0.024486400187015533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,24,8,128,1,fp8,fp8,0,0.0243599995970726
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,24,1,128,1,float16,float16,0,0.3098655939102173
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,24,1,128,1,float16,fp8,0,0.32892799377441406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,24,1,128,1,fp8,fp8,0,0.32923839092254636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,24,2,128,1,float16,float16,0,0.344702410697937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,24,2,128,1,float16,fp8,0,0.3655888080596924
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,24,2,128,1,fp8,fp8,0,0.36836159229278564
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,24,4,128,1,float16,float16,0,0.4151616096496582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,24,4,128,1,float16,fp8,0,0.43995680809021
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,24,4,128,1,fp8,fp8,0,0.44068479537963867
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,24,8,128,1,float16,float16,0,0.5620207786560059
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,24,8,128,1,float16,fp8,0,0.588646411895752
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,24,8,128,1,fp8,fp8,0,0.5889664173126221
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,24,24,128,1,float16,float16,0,0.5953423976898193
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,24,24,128,1,float16,fp8,0,0.5943759918212891
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,24,1,128,1,float16,float16,0,0.16543680429458618
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,24,24,128,1,fp8,fp8,0,0.5946640014648438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,24,1,128,1,float16,fp8,0,0.1770751953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,24,1,128,1,fp8,fp8,0,0.17737599611282348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,24,2,128,1,float16,float16,0,0.18278720378875732
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,24,2,128,1,float16,fp8,0,0.1974400043487549
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,24,2,128,1,fp8,fp8,0,0.1965791940689087
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,24,8,128,1,float16,fp8,0,0.30640320777893065
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,24,4,128,1,float16,float16,0,0.21863040924072266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,24,4,128,1,float16,fp8,0,0.23239359855651856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,24,4,128,1,fp8,fp8,0,0.2311552047729492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,24,8,128,1,float16,float16,0,0.2915263891220093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,24,8,128,1,fp8,fp8,0,0.3056879997253418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,24,24,128,1,float16,float16,0,0.31182401180267333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,24,24,128,1,float16,fp8,0,0.3060431957244873
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,24,24,128,1,fp8,fp8,0,0.30668480396270753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,24,1,128,1,float16,float16,0,0.09738079905509948
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,24,1,128,1,float16,fp8,0,0.10114560127258301
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,24,1,128,1,fp8,fp8,0,0.10107840299606323
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,24,2,128,1,float16,float16,0,0.10580480098724365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,24,2,128,1,float16,fp8,0,0.10895040035247802
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,24,2,128,1,fp8,fp8,0,0.1093135952949524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,24,4,128,1,float16,float16,0,0.12225120067596436
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,24,4,128,1,float16,fp8,0,0.12692960500717163
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,24,4,128,1,fp8,fp8,0,0.1274127960205078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,24,8,128,1,float16,float16,0,0.1597599983215332
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,24,8,128,1,float16,fp8,0,0.16288479566574096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,24,8,128,1,fp8,fp8,0,0.16306719779968262
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,24,24,128,1,float16,float16,0,0.16741600036621093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,24,24,128,1,float16,fp8,0,0.1621392011642456
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,24,24,128,1,fp8,fp8,0,0.1618783950805664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,24,1,128,1,float16,float16,0,0.05782719850540161
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,24,1,128,1,float16,fp8,0,0.05596479773521423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,24,1,128,1,fp8,fp8,0,0.055366402864456175
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,24,2,128,1,float16,float16,0,0.06361920237541199
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,24,2,128,1,float16,fp8,0,0.062457597255706786
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,24,2,128,1,fp8,fp8,0,0.061919999122619626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,24,4,128,1,float16,float16,0,0.07237600088119507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,24,4,128,1,float16,fp8,0,0.07133280038833618
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,24,4,128,1,fp8,fp8,0,0.07174720168113709
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,24,8,128,1,float16,float16,0,0.09108319878578186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,24,8,128,1,float16,fp8,0,0.09063360095024109
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,24,8,128,1,fp8,fp8,0,0.09109119772911071
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,24,24,128,1,float16,float16,0,0.09273920059204102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,24,24,128,1,float16,fp8,0,0.08577119708061218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,24,24,128,1,fp8,fp8,0,0.08531039953231812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,24,1,128,1,float16,float16,0,0.03354080021381378
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,24,1,128,1,float16,fp8,0,0.0345551997423172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,24,1,128,1,fp8,fp8,0,0.034462401270866395
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,24,2,128,1,float16,float16,0,0.03431200087070465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,24,2,128,1,float16,fp8,0,0.03531520068645477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,24,2,128,1,fp8,fp8,0,0.03547680079936981
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,24,4,128,1,float16,float16,0,0.03954559862613678
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,24,8,128,1,fp8,fp8,0,0.047163200378417966
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,24,4,128,1,float16,fp8,0,0.04052959978580475
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,24,4,128,1,fp8,fp8,0,0.039904001355171206
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,24,8,128,1,float16,float16,0,0.05118719935417175
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,24,8,128,1,float16,fp8,0,0.047860801219940186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,24,24,128,1,float16,float16,0,0.05092160105705261
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,24,24,128,1,float16,fp8,0,0.04896320104598999
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,24,24,128,1,fp8,fp8,0,0.04916960000991821
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,24,1,128,1,float16,float16,0,0.027291199564933775
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,24,1,128,1,float16,fp8,0,0.028700798749923706
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,24,1,128,1,fp8,fp8,0,0.028729599714279175
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,24,2,128,1,float16,float16,0,0.02773759961128235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,24,2,128,1,float16,fp8,0,0.028911998867988585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,24,2,128,1,fp8,fp8,0,0.02885119915008545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,24,4,128,1,float16,float16,0,0.02855199873447418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,24,4,128,1,float16,fp8,0,0.02980799973011017
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,24,4,128,1,fp8,fp8,0,0.02994079887866974
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,24,8,128,1,float16,float16,0,0.03338559865951538
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,24,8,128,1,float16,fp8,0,0.03446879982948303
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,24,8,128,1,fp8,fp8,0,0.034164801239967346
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,24,24,128,1,float16,float16,0,0.032601600885391234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,24,24,128,1,float16,fp8,0,0.03296160101890564
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,24,24,128,1,fp8,fp8,0,0.032864001393318173
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,24,1,128,1,float16,float16,0,0.02269279956817627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,24,1,128,1,float16,fp8,0,0.023582400381565095
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,24,1,128,1,fp8,fp8,0,0.023887999355793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,24,2,128,1,float16,float16,0,0.0228752002120018
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,24,2,128,1,float16,fp8,0,0.023668800294399262
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,24,2,128,1,fp8,fp8,0,0.023464000225067137
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,24,4,128,1,float16,float16,0,0.023444800078868865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,24,4,128,1,float16,fp8,0,0.023375999927520753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,24,4,128,1,fp8,fp8,0,0.024057599902153014
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,24,8,128,1,float16,float16,0,0.024060800671577454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,24,8,128,1,float16,fp8,0,0.024872000515460967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,24,8,128,1,fp8,fp8,0,0.024736000597476958
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,24,24,128,1,float16,float16,0,0.024984000623226164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,24,24,128,1,float16,fp8,0,0.0253711998462677
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,24,24,128,1,fp8,fp8,0,0.02550559937953949
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,24,1,128,1,float16,float16,0,0.021222400665283202
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,24,1,128,1,float16,fp8,0,0.022121599316596983
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,24,1,128,1,fp8,fp8,0,0.02207999974489212
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,24,2,128,1,float16,float16,0,0.021353599429130555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,24,2,128,1,float16,fp8,0,0.022375999391078948
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,24,2,128,1,fp8,fp8,0,0.02223680019378662
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,24,4,128,1,float16,float16,0,0.021465599536895752
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,24,4,128,1,float16,fp8,0,0.02231519967317581
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,24,4,128,1,fp8,fp8,0,0.022294400632381438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,24,8,128,1,float16,float16,0,0.021774399280548095
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,24,8,128,1,float16,fp8,0,0.022731199860572815
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,24,8,128,1,fp8,fp8,0,0.022487999498844148
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,24,24,128,1,float16,float16,0,0.021503999829292297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,24,24,128,1,float16,fp8,0,0.022316800057888032
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,24,24,128,1,fp8,fp8,0,0.02197439968585968
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,24,1,128,1,float16,float16,0,0.02024320065975189
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,24,1,128,1,float16,fp8,0,0.021007999777793884
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,24,1,128,1,fp8,fp8,0,0.020895999670028687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,24,2,128,1,float16,float16,0,0.020496000349521638
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,24,2,128,1,float16,fp8,0,0.020980800688266753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,24,2,128,1,fp8,fp8,0,0.021158400177955627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,24,4,128,1,float16,float16,0,0.02091200053691864
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,24,4,128,1,float16,fp8,0,0.021222400665283202
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,24,4,128,1,fp8,fp8,0,0.021238400042057036
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,24,8,128,1,float16,float16,0,0.020630399882793426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,24,8,128,1,float16,fp8,0,0.021380800008773803
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,24,8,128,1,fp8,fp8,0,0.021590399742126464
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,24,2,128,1,float16,fp8,0,0.19771039485931396
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,24,1,128,1,float16,float16,0,0.16699999570846558
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,24,1,128,1,float16,fp8,0,0.17722560167312623
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,24,1,128,1,fp8,fp8,0,0.17775360345840455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,24,2,128,1,float16,float16,0,0.1836303949356079
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,24,2,128,1,fp8,fp8,0,0.1966528058052063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,24,4,128,1,float16,float16,0,0.21871039867401124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,24,4,128,1,float16,fp8,0,0.23332641124725342
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,24,4,128,1,fp8,fp8,0,0.233404803276062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,24,8,128,1,float16,float16,0,0.32102720737457274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,24,24,128,1,fp8,fp8,0,0.35946080684661863
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,24,8,128,1,float16,fp8,0,0.33615200519561766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,24,8,128,1,fp8,fp8,0,0.3363136053085327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,24,24,128,1,float16,float16,0,0.35159199237823485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,24,24,128,1,float16,fp8,0,0.35864479541778566
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,24,1,128,1,float16,float16,0,0.09693440198898315
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,24,1,128,1,float16,fp8,0,0.10177919864654542
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,24,1,128,1,fp8,fp8,0,0.10150079727172852
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,24,2,128,1,float16,float16,0,0.10420800447463989
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,24,2,128,1,float16,fp8,0,0.10963519811630248
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,24,2,128,1,fp8,fp8,0,0.10912799835205078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,24,4,128,1,float16,float16,0,0.1222864031791687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,24,4,128,1,float16,fp8,0,0.12739039659500123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,24,4,128,1,fp8,fp8,0,0.12726399898529053
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,24,8,128,1,float16,float16,0,0.17245759963989257
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,24,8,128,1,float16,fp8,0,0.17990399599075318
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,24,8,128,1,fp8,fp8,0,0.17896480560302735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,24,24,128,1,float16,float16,0,0.1887279987335205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,24,24,128,1,float16,fp8,0,0.18952640295028686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,24,24,128,1,fp8,fp8,0,0.1891744017601013
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,24,1,128,1,float16,float16,0,0.058457601070404056
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,24,1,128,1,float16,fp8,0,0.05516639947891235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,24,1,128,1,fp8,fp8,0,0.05463839769363403
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,24,2,128,1,float16,float16,0,0.06456480026245118
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,24,4,128,1,fp8,fp8,0,0.07141600251197815
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,24,2,128,1,float16,fp8,0,0.062356799840927124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,24,2,128,1,fp8,fp8,0,0.062324798107147215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,24,8,128,1,fp8,fp8,0,0.09998720288276672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,24,4,128,1,float16,float16,0,0.07281919717788696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,24,4,128,1,float16,fp8,0,0.07166240215301514
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,24,8,128,1,float16,float16,0,0.09942240118980408
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,24,8,128,1,float16,fp8,0,0.10002720355987549
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,24,24,128,1,float16,float16,0,0.10491520166397095
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,24,24,128,1,float16,fp8,0,0.09899359941482544
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,24,24,128,1,fp8,fp8,0,0.0995248019695282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,24,1,128,1,float16,float16,0,0.032876798510551454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,24,1,128,1,float16,fp8,0,0.03456799983978272
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,24,1,128,1,fp8,fp8,0,0.034564799070358275
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,24,2,128,1,float16,float16,0,0.034078401327133176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,24,8,128,1,float16,float16,0,0.0544543981552124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,24,2,128,1,float16,fp8,0,0.035836800932884216
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,24,2,128,1,fp8,fp8,0,0.03558720052242279
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,24,4,128,1,float16,float16,0,0.03984479904174805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,24,4,128,1,float16,fp8,0,0.040191999077796935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,24,4,128,1,fp8,fp8,0,0.04005280137062073
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,24,8,128,1,float16,fp8,0,0.05228000283241272
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,24,8,128,1,fp8,fp8,0,0.05229600071907044
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,24,24,128,1,float16,float16,0,0.054153597354888915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,24,24,128,1,float16,fp8,0,0.05336800217628479
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,24,24,128,1,fp8,fp8,0,0.05321599841117859
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,24,1,128,1,float16,float16,0,0.027113598585128785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,24,1,128,1,float16,fp8,0,0.028574401140213014
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,24,1,128,1,fp8,fp8,0,0.028566399216651918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,24,2,128,1,float16,float16,0,0.027428799867630006
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,24,2,128,1,float16,fp8,0,0.028790399432182312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,24,2,128,1,fp8,fp8,0,0.028995200991630554
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,24,4,128,1,float16,float16,0,0.02850080132484436
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,24,4,128,1,float16,fp8,0,0.0298224002122879
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,24,4,128,1,fp8,fp8,0,0.02988319993019104
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,24,8,128,1,float16,float16,0,0.032576000690460204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,24,8,128,1,float16,fp8,0,0.0340144008398056
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,24,8,128,1,fp8,fp8,0,0.03411040008068085
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,24,24,128,1,float16,float16,0,0.03586559891700745
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,24,24,128,1,float16,fp8,0,0.03716320097446442
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,24,24,128,1,fp8,fp8,0,0.03740000128746033
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,24,1,128,1,float16,float16,0,0.022651199996471406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,24,1,128,1,float16,fp8,0,0.023707200586795808
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,24,1,128,1,fp8,fp8,0,0.023683199286460878
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,24,2,128,1,float16,float16,0,0.0225040003657341
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,24,2,128,1,float16,fp8,0,0.023657600581645965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,24,2,128,1,fp8,fp8,0,0.023827199637889863
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,24,4,128,1,float16,float16,0,0.02332320064306259
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,24,4,128,1,float16,fp8,0,0.023932799696922302
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,24,4,128,1,fp8,fp8,0,0.023795199394226075
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,24,8,128,1,float16,float16,0,0.023940800130367278
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,24,8,128,1,float16,fp8,0,0.024750399589538574
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,24,8,128,1,fp8,fp8,0,0.024383999407291412
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,24,24,128,1,float16,float16,0,0.02465279996395111
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,24,24,128,1,float16,fp8,0,0.025489598512649536
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,24,24,128,1,fp8,fp8,0,0.025313600897789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,24,1,128,1,float16,float16,0,0.02131039947271347
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,24,1,128,1,float16,fp8,0,0.02202879935503006
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,24,1,128,1,fp8,fp8,0,0.021905599534511565
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,24,2,128,1,float16,float16,0,0.021408000588417055
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,24,2,128,1,float16,fp8,0,0.0220768004655838
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,24,2,128,1,fp8,fp8,0,0.022441600263118745
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,24,4,128,1,float16,float16,0,0.021454399824142455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,24,4,128,1,float16,fp8,0,0.022201600670814513
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,24,4,128,1,fp8,fp8,0,0.022492800652980805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,24,8,128,1,float16,float16,0,0.0216511994600296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,24,8,128,1,float16,fp8,0,0.022460800409317017
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,24,8,128,1,fp8,fp8,0,0.022227199375629426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,24,24,128,1,float16,float16,0,0.021161599457263945
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,24,24,128,1,float16,fp8,0,0.022006399929523468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,24,24,128,1,fp8,fp8,0,0.021883200109004974
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,24,1,128,1,float16,float16,0,0.02020000070333481
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,24,1,128,1,float16,fp8,0,0.02093600034713745
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,24,1,128,1,fp8,fp8,0,0.0212351992726326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,24,2,128,1,float16,float16,0,0.02006720006465912
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,24,2,128,1,float16,fp8,0,0.020926399528980254
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,24,2,128,1,fp8,fp8,0,0.021267199516296388
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,24,4,128,1,float16,float16,0,0.020555199682712556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,24,24,128,1,float16,fp8,0,0.02114879935979843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,24,4,128,1,float16,fp8,0,0.021247999370098115
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,24,4,128,1,fp8,fp8,0,0.021447999775409697
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,24,8,128,1,float16,float16,0,0.020563200116157532
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,24,8,128,1,float16,fp8,0,0.021598400175571443
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,24,8,128,1,fp8,fp8,0,0.021289600431919097
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,24,24,128,1,float16,float16,0,0.020193600654602052
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,24,24,128,1,fp8,fp8,0,0.021060800552368163
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,24,1,128,1,float16,float16,0,0.019993600249290467
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,24,1,128,1,float16,fp8,0,0.020500800013542174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,24,1,128,1,fp8,fp8,0,0.02073120027780533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,24,2,128,1,float16,float16,0,0.01977919936180115
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,24,2,128,1,float16,fp8,0,0.020790399610996248
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,24,2,128,1,fp8,fp8,0,0.020431999862194062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,24,4,128,1,float16,float16,0,0.01985439956188202
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,24,4,128,1,float16,fp8,0,0.020657600462436677
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,24,4,128,1,fp8,fp8,0,0.020839999616146087
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,24,8,128,1,float16,float16,0,0.01997919976711273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,24,8,128,1,float16,fp8,0,0.021076799929142
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,24,8,128,1,fp8,fp8,0,0.021080000698566435
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,24,1,128,1,float16,float16,0,0.09635360240936279
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,24,1,128,1,float16,fp8,0,0.10263199806213379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,24,1,128,1,fp8,fp8,0,0.10257600545883179
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,24,2,128,1,float16,float16,0,0.10501919984817505
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,24,2,128,1,float16,fp8,0,0.11008479595184326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,24,2,128,1,fp8,fp8,0,0.11040960550308228
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,24,4,128,1,float16,float16,0,0.13817280530929565
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,24,8,128,1,fp8,fp8,0,0.18096319437026978
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,24,4,128,1,float16,fp8,0,0.14501600265502929
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,24,4,128,1,fp8,fp8,0,0.14512159824371337
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,24,8,128,1,float16,float16,0,0.1729151964187622
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,24,8,128,1,float16,fp8,0,0.1805184006690979
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,24,24,128,1,float16,float16,0,0.23445279598236085
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,24,24,128,1,float16,fp8,0,0.24386560916900635
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,24,24,128,1,fp8,fp8,0,0.24485440254211427
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,24,1,128,1,float16,float16,0,0.05902400016784668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,24,1,128,1,float16,fp8,0,0.055883198976516724
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,24,1,128,1,fp8,fp8,0,0.05652959942817688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,24,2,128,1,float16,float16,0,0.06447200179100036
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,24,2,128,1,float16,fp8,0,0.06328960061073304
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,24,2,128,1,fp8,fp8,0,0.0630944013595581
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,24,4,128,1,float16,float16,0,0.08029599785804749
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,24,4,128,1,float16,fp8,0,0.08143360018730164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,24,4,128,1,fp8,fp8,0,0.08091840147972107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,24,24,128,1,float16,fp8,0,0.12910879850387574
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,24,8,128,1,float16,float16,0,0.10018399953842164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,24,8,128,1,float16,fp8,0,0.10065280199050904
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,24,1,128,1,float16,fp8,0,0.03496319949626923
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,24,8,128,1,fp8,fp8,0,0.10050400495529174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,24,24,128,1,float16,float16,0,0.12735359668731688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,24,24,128,1,fp8,fp8,0,0.12892160415649415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,24,1,128,1,float16,float16,0,0.03300800025463104
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,24,2,128,1,float16,float16,0,0.034248000383377074
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,24,1,128,1,fp8,fp8,0,0.03494080007076263
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,24,2,128,1,float16,fp8,0,0.03606559932231903
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,24,2,128,1,fp8,fp8,0,0.03592639863491058
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,24,4,128,1,float16,float16,0,0.04331679940223694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,24,4,128,1,float16,fp8,0,0.044940799474716187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,24,4,128,1,fp8,fp8,0,0.044809600710868834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,24,8,128,1,float16,float16,0,0.05496960282325745
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,24,8,128,1,float16,fp8,0,0.052313601970672606
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,24,8,128,1,fp8,fp8,0,0.052007997035980226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,24,24,128,1,float16,float16,0,0.06627839803695679
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,24,24,128,1,float16,fp8,0,0.06719040274620056
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,24,24,128,1,fp8,fp8,0,0.06732800006866455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,24,1,128,1,float16,float16,0,0.027662399411201476
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,24,1,128,1,float16,fp8,0,0.029003199934959412
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,24,1,128,1,fp8,fp8,0,0.028966400027275085
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,24,2,128,1,float16,float16,0,0.02776640057563782
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,24,2,128,1,float16,fp8,0,0.02925119996070862
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,24,2,128,1,fp8,fp8,0,0.029359999299049377
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,24,4,128,1,float16,float16,0,0.02847839891910553
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,24,4,128,1,float16,fp8,0,0.03014560043811798
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,24,4,128,1,fp8,fp8,0,0.03017440140247345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,24,8,128,1,float16,float16,0,0.032902398705482484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,24,8,128,1,float16,fp8,0,0.03424479961395264
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,24,8,128,1,fp8,fp8,0,0.03426400125026703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,24,24,128,1,float16,float16,0,0.03984160125255585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,24,24,128,1,float16,fp8,0,0.041982400417327884
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,24,24,128,1,fp8,fp8,0,0.041617599129676816
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,24,1,128,1,float16,float16,0,0.022631999850273133
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,24,1,128,1,float16,fp8,0,0.023475199937820435
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,24,1,128,1,fp8,fp8,0,0.0235167995095253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,24,2,128,1,float16,float16,0,0.022782400250434875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,24,2,128,1,float16,fp8,0,0.02322079986333847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,24,2,128,1,fp8,fp8,0,0.023788799345493317
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,24,4,128,1,float16,float16,0,0.02295520007610321
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,24,4,128,1,float16,fp8,0,0.02370239943265915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,24,4,128,1,fp8,fp8,0,0.02372640073299408
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,24,8,128,1,float16,float16,0,0.02362080067396164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,24,8,128,1,float16,fp8,0,0.02462079972028732
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,24,8,128,1,fp8,fp8,0,0.024804799258708952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,24,24,128,1,float16,float16,0,0.028233599662780762
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,24,24,128,1,float16,fp8,0,0.029766398668289184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,24,24,128,1,fp8,fp8,0,0.02964319884777069
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,24,1,128,1,float16,float16,0,0.021352000534534454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,24,1,128,1,float16,fp8,0,0.02211360037326813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,24,1,128,1,fp8,fp8,0,0.02208320051431656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,24,2,128,1,float16,float16,0,0.021452799439430237
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,24,2,128,1,float16,fp8,0,0.022307200729846953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,24,2,128,1,fp8,fp8,0,0.022169600427150726
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,24,4,128,1,float16,float16,0,0.021345600485801697
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,24,4,128,1,float16,fp8,0,0.022393600642681123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,24,4,128,1,fp8,fp8,0,0.02210559993982315
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,24,8,128,1,float16,float16,0,0.02158239930868149
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,24,8,128,1,float16,fp8,0,0.022516800463199614
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,24,8,128,1,fp8,fp8,0,0.022217600047588347
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,24,24,128,1,float16,float16,0,0.021161599457263945
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,24,24,128,1,float16,fp8,0,0.0217631995677948
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,24,2,128,1,fp8,fp8,0,0.021091200411319733
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,24,24,128,1,fp8,fp8,0,0.022307200729846953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,24,1,128,1,float16,float16,0,0.02003040015697479
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,24,1,128,1,float16,fp8,0,0.02122880071401596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,24,1,128,1,fp8,fp8,0,0.02091519981622696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,24,2,128,1,float16,float16,0,0.020579199492931365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,24,2,128,1,float16,fp8,0,0.021033599972724915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,24,4,128,1,float16,float16,0,0.020417599380016326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,24,4,128,1,float16,fp8,0,0.021580800414085388
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,24,4,128,1,fp8,fp8,0,0.021505600214004515
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,24,8,128,1,float16,float16,0,0.020524799823760986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,24,8,128,1,float16,fp8,0,0.02160159945487976
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,24,8,128,1,fp8,fp8,0,0.021353599429130555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,24,24,128,1,float16,float16,0,0.020521600544452668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,24,24,128,1,float16,fp8,0,0.021038399636745454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,24,24,128,1,fp8,fp8,0,0.021118399500846863
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,24,1,128,1,float16,float16,0,0.019832000136375427
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,24,1,128,1,float16,fp8,0,0.020776000618934632
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,24,1,128,1,fp8,fp8,0,0.0207056000828743
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,24,2,128,1,float16,float16,0,0.019977599382400513
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,24,2,128,1,float16,fp8,0,0.02056639939546585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,24,2,128,1,fp8,fp8,0,0.020686399936676026
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,24,4,128,1,float16,float16,0,0.020046399533748628
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,24,4,128,1,float16,fp8,0,0.02086720019578934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,24,4,128,1,fp8,fp8,0,0.020865599811077117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,24,8,128,1,float16,float16,0,0.020136000216007234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,24,8,128,1,float16,fp8,0,0.02091359943151474
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,24,8,128,1,fp8,fp8,0,0.02080480009317398
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,24,24,128,1,float16,float16,0,0.02001120001077652
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,24,24,128,1,float16,fp8,0,0.020688000321388244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,24,24,128,1,fp8,fp8,0,0.02139520049095154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,24,1,128,1,float16,float16,0,0.01960480064153671
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,24,1,128,1,float16,fp8,0,0.020521600544452668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,24,1,128,1,fp8,fp8,0,0.02038239985704422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,24,2,128,1,float16,float16,0,0.01959040015935898
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,24,2,128,1,float16,fp8,0,0.02035840004682541
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,24,2,128,1,fp8,fp8,0,0.020201599597930907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,24,4,128,1,float16,float16,0,0.01974239945411682
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,24,4,128,1,float16,fp8,0,0.020627200603485107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,24,4,128,1,fp8,fp8,0,0.020766399800777435
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,24,8,128,1,float16,float16,0,0.019631999731063842
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,24,8,128,1,float16,fp8,0,0.020585599541664123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,24,8,128,1,fp8,fp8,0,0.020182399451732634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,24,1,128,1,float16,float16,0,0.05940160155296326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,24,1,128,1,float16,fp8,0,0.05763999819755554
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,24,4,128,1,float16,fp8,0,0.08327040076255798
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,24,1,128,1,fp8,fp8,0,0.05818560123443604
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,24,2,128,1,float16,float16,0,0.07283999919891357
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,24,2,128,1,fp8,fp8,0,0.07227519750595093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,24,2,128,1,float16,fp8,0,0.07192959785461425
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,24,4,128,1,float16,float16,0,0.08140799999237061
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,24,4,128,1,fp8,fp8,0,0.08217440247535705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,24,8,128,1,float16,float16,0,0.1301792025566101
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,24,8,128,1,float16,fp8,0,0.13875679969787597
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,24,8,128,1,fp8,fp8,0,0.1388368010520935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,24,24,128,1,float16,float16,0,0.17390880584716797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,24,24,128,1,float16,fp8,0,0.18658239841461183
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,24,24,128,1,fp8,fp8,0,0.18706400394439698
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,24,1,128,1,float16,float16,0,0.033816000819206236
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,24,1,128,1,float16,fp8,0,0.03599199950695038
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,24,1,128,1,fp8,fp8,0,0.03617759943008423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,24,2,128,1,float16,float16,0,0.038646399974823
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,24,2,128,1,float16,fp8,0,0.04157919883728027
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,24,2,128,1,fp8,fp8,0,0.041580799221992495
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,24,4,128,1,float16,float16,0,0.04536480009555817
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,24,4,128,1,float16,fp8,0,0.0460640013217926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,24,4,128,1,fp8,fp8,0,0.04610080122947693
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,24,8,128,1,float16,float16,0,0.06964799761772156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,24,8,128,1,float16,fp8,0,0.07130720019340515
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,24,8,128,1,fp8,fp8,0,0.07124320268630982
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,24,24,128,1,float16,float16,0,0.08933119773864746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,24,24,128,1,float16,fp8,0,0.09505919814109802
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,24,24,128,1,fp8,fp8,0,0.09549919962882995
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,24,1,128,1,float16,float16,0,0.028044798970222475
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,24,1,128,1,float16,fp8,0,0.029604798555374144
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,24,1,128,1,fp8,fp8,0,0.029502400755882265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,24,2,128,1,float16,float16,0,0.02800639867782593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,24,2,128,1,float16,fp8,0,0.02972480058670044
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,24,2,128,1,fp8,fp8,0,0.029872000217437744
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,24,4,128,1,float16,float16,0,0.028889599442481994
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,24,4,128,1,float16,fp8,0,0.030649599432945252
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,24,4,128,1,fp8,fp8,0,0.030700799822807313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,24,8,128,1,float16,float16,0,0.040513598918914796
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,24,8,128,1,float16,fp8,0,0.04351840019226074
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,24,8,128,1,fp8,fp8,0,0.04364640116691589
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,24,24,128,1,float16,float16,0,0.05122079849243164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,24,24,128,1,float16,fp8,0,0.05605760216712952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,24,24,128,1,fp8,fp8,0,0.055720001459121704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,24,1,128,1,float16,float16,0,0.02284960001707077
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,24,1,128,1,float16,fp8,0,0.023825600743293762
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,24,1,128,1,fp8,fp8,0,0.023889599740505217
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,24,2,128,1,float16,float16,0,0.02276960015296936
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,24,2,128,1,float16,fp8,0,0.023873600363731384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,24,2,128,1,fp8,fp8,0,0.023928000032901763
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,24,4,128,1,float16,float16,0,0.023464000225067137
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,24,4,128,1,float16,fp8,0,0.024040000140666963
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,24,4,128,1,fp8,fp8,0,0.0239424005150795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,24,8,128,1,float16,float16,0,0.027758398652076723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,24,8,128,1,float16,fp8,0,0.029499199986457825
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,24,8,128,1,fp8,fp8,0,0.02929440140724182
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,24,24,128,1,float16,float16,0,0.03215680122375488
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,24,24,128,1,float16,fp8,0,0.034246399998664856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,24,24,128,1,fp8,fp8,0,0.03440479934215546
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,24,1,128,1,float16,float16,0,0.021476800739765167
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,24,1,128,1,float16,fp8,0,0.02221599966287613
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,24,1,128,1,fp8,fp8,0,0.02245279997587204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,24,2,128,1,float16,float16,0,0.021460799872875212
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,24,2,128,1,float16,fp8,0,0.021960000693798064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,24,2,128,1,fp8,fp8,0,0.02221119999885559
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,24,4,128,1,float16,float16,0,0.021382400393486024
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,24,4,128,1,float16,fp8,0,0.0220208004117012
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,24,4,128,1,fp8,fp8,0,0.022129599750041962
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,24,8,128,1,float16,float16,0,0.02163680046796799
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,24,8,128,1,float16,fp8,0,0.02259040027856827
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,24,8,128,1,fp8,fp8,0,0.022388799488544463
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,24,24,128,1,float16,float16,0,0.02489439994096756
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,24,24,128,1,float16,fp8,0,0.02643519937992096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,24,24,128,1,fp8,fp8,0,0.02659359872341156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,24,1,128,1,float16,float16,0,0.020286400616168977
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,24,1,128,1,float16,fp8,0,0.02133920043706894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,24,1,128,1,fp8,fp8,0,0.021456000208854676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,24,2,128,1,float16,float16,0,0.020310400426387785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,24,2,128,1,float16,fp8,0,0.021104000508785248
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,24,2,128,1,fp8,fp8,0,0.021163199841976166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,24,4,128,1,float16,float16,0,0.02056639939546585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,24,4,128,1,float16,fp8,0,0.02123199999332428
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,24,4,128,1,fp8,fp8,0,0.02128639966249466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,24,8,128,1,float16,float16,0,0.020734399557113647
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,24,8,128,1,float16,fp8,0,0.021275199949741364
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,24,8,128,1,fp8,fp8,0,0.021172800660133363
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,24,24,128,1,float16,float16,0,0.020470400154590607
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,24,24,128,1,float16,fp8,0,0.02141920030117035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,24,24,128,1,fp8,fp8,0,0.021270400285720824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,24,1,128,1,float16,float16,0,0.01969120055437088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,24,1,128,1,float16,fp8,0,0.02078240066766739
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,24,1,128,1,fp8,fp8,0,0.020689600706100465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,24,2,128,1,float16,float16,0,0.019875200092792512
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,24,2,128,1,float16,fp8,0,0.02044160068035126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,24,2,128,1,fp8,fp8,0,0.02083200067281723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,24,4,128,1,float16,float16,0,0.01995519995689392
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,24,4,128,1,float16,fp8,0,0.02083680033683777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,24,4,128,1,fp8,fp8,0,0.02075680047273636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,24,24,128,1,fp8,fp8,0,0.02141599953174591
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,24,8,128,1,float16,float16,0,0.020292800664901734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,24,8,128,1,float16,fp8,0,0.02084160000085831
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,24,8,128,1,fp8,fp8,0,0.020846399664878845
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,24,24,128,1,float16,float16,0,0.020028799772262573
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,24,24,128,1,float16,fp8,0,0.021505600214004515
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,24,1,128,1,float16,float16,0,0.019551999866962433
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,24,1,128,1,float16,fp8,0,0.02048960030078888
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,24,1,128,1,fp8,fp8,0,0.020238399505615234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,24,2,128,1,float16,float16,0,0.019628800451755524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,24,2,128,1,float16,fp8,0,0.020292800664901734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,24,2,128,1,fp8,fp8,0,0.02009759992361069
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,24,4,128,1,float16,float16,0,0.019363200664520262
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,24,4,128,1,float16,fp8,0,0.020374399423599244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,24,4,128,1,fp8,fp8,0,0.0203232005238533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,24,8,128,1,float16,float16,0,0.01977120041847229
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,24,8,128,1,float16,fp8,0,0.020534400641918183
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,24,8,128,1,fp8,fp8,0,0.020556800067424774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,24,24,128,1,float16,float16,0,0.019860799610614776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,24,24,128,1,float16,fp8,0,0.02067999988794327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,24,24,128,1,fp8,fp8,0,0.020686399936676026
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,24,1,128,1,float16,float16,0,0.019385600090026857
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,24,1,128,1,float16,fp8,0,0.020497600734233856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,24,1,128,1,fp8,fp8,0,0.02030239999294281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,24,2,128,1,float16,float16,0,0.019529600441455842
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,24,2,128,1,float16,fp8,0,0.020535999536514284
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,24,2,128,1,fp8,fp8,0,0.02006720006465912
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,24,4,128,1,float16,float16,0,0.019435200095176696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,24,4,128,1,float16,fp8,0,0.020150400698184967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,24,4,128,1,fp8,fp8,0,0.020377600193023683
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,24,8,128,1,float16,float16,0,0.01966879963874817
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,24,8,128,1,float16,fp8,0,0.02038560062646866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,24,8,128,1,fp8,fp8,0,0.020371200144290925
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,24,1,128,1,float16,float16,0,0.029867199063301087
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,24,1,128,1,float16,fp8,0,0.03112959861755371
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,24,1,128,1,fp8,fp8,0,0.0313647985458374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,24,2,128,1,float16,float16,0,0.03707999885082245
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,24,2,128,1,float16,fp8,0,0.04018239974975586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,24,2,128,1,fp8,fp8,0,0.040171200037002565
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,24,4,128,1,float16,float16,0,0.05196800231933594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,24,4,128,1,float16,fp8,0,0.05817279815673828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,24,4,128,1,fp8,fp8,0,0.05803999900817871
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,24,8,128,1,float16,float16,0,0.07995359897613526
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,24,8,128,1,float16,fp8,0,0.09376800060272217
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,24,8,128,1,fp8,fp8,0,0.09414880275726319
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,24,24,128,1,float16,float16,0,0.10751359462738037
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,24,24,128,1,float16,fp8,0,0.12854559421539308
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,24,24,128,1,fp8,fp8,0,0.1292143940925598
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,24,1,128,1,float16,float16,0,0.022759999334812164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,24,1,128,1,float16,fp8,0,0.023742400109767914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,24,1,128,1,fp8,fp8,0,0.02359199970960617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,24,2,128,1,float16,float16,0,0.02654080092906952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,24,2,128,1,float16,fp8,0,0.028123199939727783
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,24,2,128,1,fp8,fp8,0,0.028329598903656005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,24,4,128,1,float16,float16,0,0.03421759903430939
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,24,4,128,1,float16,fp8,0,0.037448000907897946
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,24,4,128,1,fp8,fp8,0,0.03744319975376129
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,24,8,128,1,float16,float16,0,0.04868960082530975
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,24,8,128,1,float16,fp8,0,0.05477439761161804
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,24,8,128,1,fp8,fp8,0,0.054979199171066286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,24,24,128,1,float16,float16,0,0.06292960047721863
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,24,24,128,1,float16,fp8,0,0.07265920042991639
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,24,24,128,1,fp8,fp8,0,0.07249919772148132
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,24,1,128,1,float16,float16,0,0.02136639952659607
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,24,1,128,1,float16,fp8,0,0.022008000314235686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,24,1,128,1,fp8,fp8,0,0.022073599696159362
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,24,2,128,1,float16,float16,0,0.021272000670433045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,24,2,128,1,float16,fp8,0,0.022249600291252135
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,24,2,128,1,fp8,fp8,0,0.0225615993142128
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,24,4,128,1,float16,float16,0,0.025363200902938844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,24,4,128,1,float16,fp8,0,0.026943999528884887
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,24,4,128,1,fp8,fp8,0,0.02687999904155731
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,24,8,128,1,float16,float16,0,0.03299840092658997
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,24,8,128,1,float16,fp8,0,0.03588480055332184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,24,8,128,1,fp8,fp8,0,0.03577440083026886
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,24,1,128,1,float16,fp8,0,0.020931200683116914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,24,24,128,1,float16,float16,0,0.03986240029335022
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,24,24,128,1,float16,fp8,0,0.04470239877700806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,24,24,128,1,fp8,fp8,0,0.044736000895500186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,24,1,128,1,float16,float16,0,0.019995200634002685
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,24,1,128,1,fp8,fp8,0,0.02096959948539734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,24,2,128,1,float16,float16,0,0.019991999864578246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,24,2,128,1,float16,fp8,0,0.021433599293231964
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,24,2,128,1,fp8,fp8,0,0.021115200221538545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,24,4,128,1,float16,float16,0,0.02078080028295517
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,24,4,128,1,float16,fp8,0,0.02152799963951111
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,24,4,128,1,fp8,fp8,0,0.02168480008840561
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,24,8,128,1,float16,float16,0,0.02434239983558655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,24,8,128,1,float16,fp8,0,0.02595199942588806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,24,8,128,1,fp8,fp8,0,0.02629440128803253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,24,24,128,1,float16,float16,0,0.028071999549865723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,24,24,128,1,float16,fp8,0,0.030542400479316712
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,24,24,128,1,fp8,fp8,0,0.030417600274086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,24,1,128,1,float16,float16,0,0.019472000002861024
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,24,1,128,1,float16,fp8,0,0.020182399451732634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,24,1,128,1,fp8,fp8,0,0.020136000216007234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,24,2,128,1,float16,float16,0,0.019763199985027312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,24,2,128,1,float16,fp8,0,0.020528000593185425
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,24,2,128,1,fp8,fp8,0,0.020766399800777435
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,24,4,128,1,float16,float16,0,0.01982239931821823
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,24,4,128,1,float16,fp8,0,0.020791999995708466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,24,4,128,1,fp8,fp8,0,0.020875200629234314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,24,8,128,1,float16,float16,0,0.02028000056743622
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,24,8,128,1,float16,fp8,0,0.020852799713611602
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,24,8,128,1,fp8,fp8,0,0.021160000562667848
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,24,24,128,1,float16,float16,0,0.0238864004611969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,24,24,128,1,float16,fp8,0,0.025361600518226623
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,24,24,128,1,fp8,fp8,0,0.02540160119533539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,24,1,128,1,float16,float16,0,0.019171200692653656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,24,1,128,1,float16,fp8,0,0.019952000677585603
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,24,1,128,1,fp8,fp8,0,0.019673599302768706
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,24,2,128,1,float16,float16,0,0.019289599359035493
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,24,2,128,1,float16,fp8,0,0.02019679993391037
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,24,2,128,1,fp8,fp8,0,0.020150400698184967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,24,4,128,1,float16,float16,0,0.019622400403022766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,24,4,128,1,float16,fp8,0,0.020615999400615693
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,24,4,128,1,fp8,fp8,0,0.020295999944210052
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,24,8,128,1,float16,float16,0,0.019784000515937806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,24,8,128,1,float16,fp8,0,0.0204927995800972
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,24,8,128,1,fp8,fp8,0,0.020499199628829956
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,24,24,128,1,float16,float16,0,0.01976960003376007
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,24,24,128,1,float16,fp8,0,0.02077440023422241
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,24,24,128,1,fp8,fp8,0,0.020744000375270844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,24,1,128,1,float16,float16,0,0.019047999382019044
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,24,1,128,1,float16,fp8,0,0.019819200038909912
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,24,1,128,1,fp8,fp8,0,0.020068800449371337
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,24,2,128,1,float16,float16,0,0.019043199717998505
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,24,2,128,1,float16,fp8,0,0.01971199959516525
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,24,2,128,1,fp8,fp8,0,0.01997919976711273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,24,4,128,1,float16,float16,0,0.019385600090026857
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,24,4,128,1,float16,fp8,0,0.019996799528598785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,24,4,128,1,fp8,fp8,0,0.020252799987792967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,24,8,128,1,float16,float16,0,0.01940480023622513
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,24,8,128,1,float16,fp8,0,0.020478400588035583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,24,8,128,1,fp8,fp8,0,0.0200655996799469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,24,24,128,1,float16,float16,0,0.019355200231075287
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,24,24,128,1,float16,fp8,0,0.020584000647068022
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,24,24,128,1,fp8,fp8,0,0.02045599967241287
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,24,1,128,1,float16,float16,0,0.017308799922466277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,24,1,128,1,float16,fp8,0,0.017827199399471284
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,24,1,128,1,fp8,fp8,0,0.01775840073823929
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,24,2,128,1,float16,float16,0,0.018849599361419677
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,24,2,128,1,float16,fp8,0,0.01961439996957779
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,24,2,128,1,fp8,fp8,0,0.019486400485038757
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,24,4,128,1,float16,float16,0,0.019167999923229217
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,24,4,128,1,float16,fp8,0,0.01977279931306839
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,24,4,128,1,fp8,fp8,0,0.01979839950799942
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,24,8,128,1,float16,float16,0,0.019208000600337984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,24,8,128,1,float16,fp8,0,0.019857600331306458
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,24,8,128,1,fp8,fp8,0,0.02025119960308075
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,24,24,128,1,float16,float16,0,0.019072000682353974
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,24,24,128,1,float16,fp8,0,0.019843199849128725
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,24,24,128,1,fp8,fp8,0,0.02028159946203232
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,24,1,128,1,float16,float16,0,0.016177600622177123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,24,1,128,1,float16,fp8,0,0.017129600048065186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,24,1,128,1,fp8,fp8,0,0.016982400417327882
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,24,2,128,1,float16,float16,0,0.017190399765968322
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,24,2,128,1,float16,fp8,0,0.017905600368976593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,24,2,128,1,fp8,fp8,0,0.01775840073823929
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,24,4,128,1,float16,float16,0,0.01863040030002594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,24,4,128,1,float16,fp8,0,0.019707199931144715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,24,4,128,1,fp8,fp8,0,0.019332799315452575
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,24,8,128,1,float16,float16,0,0.01896799951791763
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,24,8,128,1,float16,fp8,0,0.01966720074415207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,24,8,128,1,fp8,fp8,0,0.019441600143909454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,16,1,128,1,fp8,fp8,0,12.702537536621094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,16,1,128,1,float16,fp8,0,12.982583618164062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,16,2,128,1,float16,fp8,0,13.011944580078126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,16,2,128,1,fp8,fp8,0,12.850125122070313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,16,1,128,1,float16,float16,0,15.264743041992187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,16,2,128,1,float16,float16,0,15.421824645996093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,16,4,128,1,float16,float16,0,15.709368896484374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,16,4,128,1,float16,fp8,0,13.240664672851562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,16,16,128,1,float16,float16,0,8.59868927001953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,16,16,128,1,float16,fp8,0,7.408604431152344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,16,16,128,1,fp8,fp8,0,7.490620422363281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,16,4,128,1,fp8,fp8,0,13.162739562988282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,16,8,128,1,float16,fp8,0,13.837141418457032
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,16,8,128,1,fp8,fp8,0,13.667628479003906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,16,1,128,1,float16,float16,0,7.637120056152344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,16,8,128,1,float16,float16,0,16.40556182861328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,16,1,128,1,float16,fp8,0,6.400682830810547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,16,1,128,1,fp8,fp8,0,6.401660919189453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,16,2,128,1,float16,float16,0,7.5558929443359375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,16,2,128,1,float16,fp8,0,6.507044982910156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,16,2,128,1,fp8,fp8,0,6.499393463134766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,16,4,128,1,float16,fp8,0,6.737721252441406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,16,4,128,1,float16,float16,0,7.871562957763672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,16,4,128,1,fp8,fp8,0,6.76060791015625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,16,8,128,1,float16,fp8,0,6.939755249023437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,16,16,128,1,float16,float16,0,4.570284652709961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,16,8,128,1,float16,float16,0,8.58379669189453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,16,16,128,1,float16,fp8,0,3.7775169372558595
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,16,8,128,1,fp8,fp8,0,7.243857574462891
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,16,16,128,1,fp8,fp8,0,3.7205902099609376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,16,1,128,1,float16,float16,0,3.8342208862304688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,16,1,128,1,float16,fp8,0,3.21319694519043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,16,1,128,1,fp8,fp8,0,3.1899040222167967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,16,2,128,1,float16,float16,0,3.8767631530761717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,16,2,128,1,float16,fp8,0,3.3634143829345704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,16,2,128,1,fp8,fp8,0,3.2636367797851564
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,16,4,128,1,float16,fp8,0,3.3416881561279297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,16,4,128,1,float16,float16,0,3.836991882324219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,16,4,128,1,fp8,fp8,0,3.3890270233154296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,16,8,128,1,float16,float16,0,3.992416000366211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,16,16,128,1,float16,float16,0,2.025172805786133
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,16,8,128,1,float16,fp8,0,3.4672096252441404
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,16,16,128,1,float16,fp8,0,1.886087989807129
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,16,16,128,1,fp8,fp8,0,1.9030143737792968
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,16,8,128,1,fp8,fp8,0,3.539379119873047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,16,1,128,1,float16,float16,0,1.9176959991455078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,16,1,128,1,float16,fp8,0,1.60327205657959
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,16,1,128,1,fp8,fp8,0,1.8527423858642578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,16,2,128,1,float16,fp8,0,1.6203535079956055
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,16,2,128,1,float16,float16,0,1.8139951705932618
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,16,2,128,1,fp8,fp8,0,1.619584083557129
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,16,4,128,1,float16,float16,0,1.831399917602539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,16,4,128,1,float16,fp8,0,1.6584800720214843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,16,4,128,1,fp8,fp8,0,1.7215263366699218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,16,8,128,1,float16,float16,0,1.8733583450317384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,16,8,128,1,float16,fp8,0,1.7596303939819335
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,16,8,128,1,fp8,fp8,0,1.7284511566162108
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,16,1,128,1,float16,fp8,0,7.344321441650391
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,16,1,128,1,fp8,fp8,0,7.340128326416016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,16,2,128,1,float16,fp8,0,7.470780944824218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,16,1,128,1,float16,float16,0,8.580313873291015
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,16,2,128,1,fp8,fp8,0,7.396475219726563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,16,2,128,1,float16,float16,0,8.808393859863282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,16,4,128,1,float16,fp8,0,7.59373779296875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,16,4,128,1,float16,float16,0,9.063201904296875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,16,16,128,1,float16,float16,0,5.103171157836914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,16,16,128,1,fp8,fp8,0,4.441798400878906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,16,16,128,1,float16,fp8,0,4.498030471801758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,16,1,128,1,float16,float16,0,4.332030487060547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,16,4,128,1,fp8,fp8,0,7.678291320800781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,16,8,128,1,float16,fp8,0,8.090447998046875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,16,8,128,1,fp8,fp8,0,8.068593597412109
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,16,8,128,1,float16,float16,0,9.510489654541015
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,16,1,128,1,fp8,fp8,0,3.632854461669922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,16,1,128,1,float16,fp8,0,3.7986560821533204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,16,2,128,1,float16,float16,0,4.274339294433593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,16,2,128,1,float16,fp8,0,3.8801601409912108
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,16,2,128,1,fp8,fp8,0,3.7032958984375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,16,4,128,1,float16,fp8,0,3.8001937866210938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,16,4,128,1,float16,float16,0,4.384120178222656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,16,4,128,1,fp8,fp8,0,3.867287826538086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,16,16,128,1,float16,fp8,0,2.2350303649902346
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,16,16,128,1,float16,float16,0,2.5175712585449217
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,16,8,128,1,float16,fp8,0,4.016572952270508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,16,8,128,1,float16,float16,0,4.755875015258789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,16,16,128,1,fp8,fp8,0,2.2448896408081054
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,16,8,128,1,fp8,fp8,0,4.193857574462891
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,16,1,128,1,float16,float16,0,2.0703535079956055
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,16,1,128,1,float16,fp8,0,1.8326223373413086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,16,1,128,1,fp8,fp8,0,1.82032470703125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,16,2,128,1,float16,float16,0,2.0398160934448244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,16,2,128,1,float16,fp8,0,1.8575231552124023
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,16,2,128,1,fp8,fp8,0,1.9699520111083983
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,16,4,128,1,float16,float16,0,2.1093088150024415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,16,4,128,1,float16,fp8,0,1.912001609802246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,16,4,128,1,fp8,fp8,0,2.0291311264038088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,16,16,128,1,float16,float16,0,1.2460176467895507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,16,8,128,1,float16,fp8,0,2.0066112518310546
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,16,8,128,1,float16,float16,0,2.2317359924316404
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,16,16,128,1,float16,fp8,0,1.1930095672607421
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,16,8,128,1,fp8,fp8,0,2.1624399185180665
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,16,1,128,1,float16,float16,0,0.9912655830383301
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,16,16,128,1,fp8,fp8,0,1.3011808395385742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,16,1,128,1,float16,fp8,0,0.9713328361511231
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,16,1,128,1,fp8,fp8,0,0.9233280181884765
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,16,2,128,1,float16,float16,0,1.016425609588623
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,16,2,128,1,float16,fp8,0,0.9432016372680664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,16,2,128,1,fp8,fp8,0,0.9519488334655761
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,16,4,128,1,float16,float16,0,1.0349023818969727
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,16,4,128,1,float16,fp8,0,0.9911168098449707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,16,4,128,1,fp8,fp8,0,0.9865839958190918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,16,8,128,1,float16,float16,0,1.1164799690246583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,16,8,128,1,float16,fp8,0,1.0542752265930175
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,16,8,128,1,fp8,fp8,0,1.0249008178710937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,16,1,128,1,float16,fp8,0,5.165201568603516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,16,1,128,1,fp8,fp8,0,5.135164642333985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,16,1,128,1,float16,float16,0,6.109590530395508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,16,2,128,1,float16,fp8,0,5.248819351196289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,16,2,128,1,fp8,fp8,0,5.229204940795898
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,16,2,128,1,float16,float16,0,6.135595321655273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,16,4,128,1,float16,float16,0,6.192529678344727
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,16,4,128,1,float16,fp8,0,5.405303955078125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,16,16,128,1,float16,float16,0,3.6623790740966795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,16,16,128,1,float16,fp8,0,3.3434894561767576
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,16,16,128,1,fp8,fp8,0,3.345596694946289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,16,4,128,1,fp8,fp8,0,5.371246337890625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,16,8,128,1,float16,fp8,0,5.814326477050781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,16,8,128,1,fp8,fp8,0,5.751747131347656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,16,1,128,1,float16,float16,0,2.7779903411865234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,16,8,128,1,float16,float16,0,6.731201934814453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,16,1,128,1,float16,fp8,0,2.6095279693603515
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,16,1,128,1,fp8,fp8,0,2.5650047302246093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,16,2,128,1,float16,fp8,0,2.6073808670043945
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,16,2,128,1,float16,float16,0,2.991057586669922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,16,2,128,1,fp8,fp8,0,2.617755126953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,16,4,128,1,float16,float16,0,3.088699150085449
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,16,4,128,1,float16,fp8,0,2.8454191207885744
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,16,4,128,1,fp8,fp8,0,2.7622655868530273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,16,8,128,1,float16,fp8,0,2.8814239501953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,16,16,128,1,float16,float16,0,1.8617712020874024
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,16,8,128,1,float16,float16,0,3.2858673095703126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,16,16,128,1,float16,fp8,0,1.6577903747558593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,16,8,128,1,fp8,fp8,0,2.8778432846069335
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,16,1,128,1,float16,float16,0,1.3762639999389648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,16,1,128,1,float16,fp8,0,1.2985568046569824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,16,16,128,1,fp8,fp8,0,1.7772367477416993
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,16,1,128,1,fp8,fp8,0,1.2971887588500977
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,16,2,128,1,float16,float16,0,1.4263775825500489
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,16,2,128,1,fp8,fp8,0,1.3237824440002441
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,16,2,128,1,float16,fp8,0,1.4523903846740722
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,16,4,128,1,float16,float16,0,1.460374355316162
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,16,4,128,1,float16,fp8,0,1.4069408416748046
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,16,4,128,1,fp8,fp8,0,1.3679327964782715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,16,8,128,1,float16,float16,0,1.5955856323242188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,16,8,128,1,float16,fp8,0,1.4537391662597656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,16,16,128,1,float16,float16,0,0.927126407623291
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,16,16,128,1,float16,fp8,0,0.8472304344177246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,16,8,128,1,fp8,fp8,0,1.6247903823852539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,16,1,128,1,float16,float16,0,0.6993072032928467
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,16,16,128,1,fp8,fp8,0,0.9136832237243653
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,16,1,128,1,float16,fp8,0,0.6637072086334228
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,16,1,128,1,fp8,fp8,0,0.6975984096527099
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,16,4,128,1,float16,float16,0,0.7473199844360352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,16,2,128,1,float16,float16,0,0.7032671928405761
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,16,2,128,1,float16,fp8,0,0.6740479946136475
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,16,2,128,1,fp8,fp8,0,0.676254415512085
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,16,4,128,1,float16,fp8,0,0.6969456195831298
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,16,4,128,1,fp8,fp8,0,0.6989952087402344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,16,8,128,1,float16,float16,0,0.8049407958984375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,16,8,128,1,float16,fp8,0,0.7562128067016601
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,16,8,128,1,fp8,fp8,0,0.7430799961090088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,16,1,128,1,float16,fp8,0,6.760539245605469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,16,1,128,1,fp8,fp8,0,6.696932983398438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,16,1,128,1,float16,float16,0,7.834391784667969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,16,2,128,1,float16,fp8,0,6.821683502197265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,16,2,128,1,fp8,fp8,0,6.8493293762207035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,16,2,128,1,float16,float16,0,7.986307525634766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,16,4,128,1,float16,fp8,0,7.079815673828125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,16,4,128,1,float16,float16,0,8.328826904296875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,16,16,128,1,float16,float16,0,4.909543991088867
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,16,16,128,1,float16,fp8,0,4.463647842407227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,16,16,128,1,fp8,fp8,0,4.411427307128906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,16,1,128,1,float16,float16,0,3.957051086425781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,16,4,128,1,fp8,fp8,0,7.169761657714844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,16,8,128,1,fp8,fp8,0,7.640411376953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,16,8,128,1,float16,fp8,0,7.7057746887207035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,16,8,128,1,float16,float16,0,8.976853179931641
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,16,1,128,1,fp8,fp8,0,3.3337024688720702
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,16,1,128,1,float16,fp8,0,3.400388717651367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,16,2,128,1,float16,float16,0,3.9720081329345702
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,16,2,128,1,float16,fp8,0,3.5356929779052733
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,16,2,128,1,fp8,fp8,0,3.4466064453125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,16,4,128,1,float16,fp8,0,3.5597488403320314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,16,4,128,1,float16,float16,0,4.040065765380859
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,16,4,128,1,fp8,fp8,0,3.5870800018310547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,16,16,128,1,float16,float16,0,2.3969999313354493
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,16,16,128,1,float16,fp8,0,2.228334426879883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,16,8,128,1,float16,fp8,0,3.8380321502685546
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,16,8,128,1,float16,float16,0,4.387795257568359
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,16,8,128,1,fp8,fp8,0,3.904052734375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,16,16,128,1,fp8,fp8,0,2.224496078491211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,16,1,128,1,float16,float16,0,1.8421360015869142
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,16,1,128,1,float16,fp8,0,1.6806655883789063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,16,1,128,1,fp8,fp8,0,1.6821887969970704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,16,2,128,1,float16,float16,0,1.876153564453125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,16,2,128,1,float16,fp8,0,1.7876703262329101
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,16,2,128,1,fp8,fp8,0,1.721614456176758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,16,4,128,1,float16,float16,0,1.9439775466918945
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,16,4,128,1,float16,fp8,0,1.8294815063476562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,16,4,128,1,fp8,fp8,0,1.790131187438965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,16,8,128,1,float16,float16,0,2.1337007522583007
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,16,16,128,1,float16,float16,0,1.1929120063781737
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,16,1,128,1,float16,float16,0,0.9092415809631348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,16,8,128,1,float16,fp8,0,2.0334320068359375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,16,16,128,1,fp8,fp8,0,1.125699234008789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,16,16,128,1,float16,fp8,0,1.197811222076416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,16,8,128,1,fp8,fp8,0,1.9364112854003905
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,16,1,128,1,float16,fp8,0,0.9093888282775879
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,16,1,128,1,fp8,fp8,0,0.8552576065063476
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,16,2,128,1,float16,float16,0,0.9585344314575195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,16,2,128,1,float16,fp8,0,0.8839136123657226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,16,2,128,1,fp8,fp8,0,0.8748160362243652
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,16,4,128,1,float16,float16,0,0.957084846496582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,16,4,128,1,float16,fp8,0,0.9107456207275391
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,16,4,128,1,fp8,fp8,0,0.9227104187011719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,16,8,128,1,float16,float16,0,1.04345760345459
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,16,8,128,1,float16,fp8,0,0.9978256225585938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,16,16,128,1,float16,float16,0,0.6050960063934326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,16,8,128,1,fp8,fp8,0,0.9817279815673828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,16,16,128,1,float16,fp8,0,0.5858384132385254
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,16,16,128,1,fp8,fp8,0,0.5774479866027832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,16,1,128,1,float16,float16,0,0.4675727844238281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,16,1,128,1,float16,fp8,0,0.44671359062194826
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,16,1,128,1,fp8,fp8,0,0.44148960113525393
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,16,2,128,1,float16,float16,0,0.47579197883605956
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,16,2,128,1,float16,fp8,0,0.4528463840484619
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,16,2,128,1,fp8,fp8,0,0.4517024040222168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,16,4,128,1,float16,float16,0,0.4860703945159912
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,16,4,128,1,float16,fp8,0,0.4714223861694336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,16,4,128,1,fp8,fp8,0,0.4707280158996582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,16,8,128,1,float16,float16,0,0.5324384212493897
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,16,8,128,1,float16,fp8,0,0.5041855812072754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,16,8,128,1,fp8,fp8,0,0.5035039901733398
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,16,1,128,1,float16,fp8,0,3.9055248260498048
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,16,1,128,1,float16,float16,0,4.32795524597168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,16,1,128,1,fp8,fp8,0,3.876364898681641
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,16,2,128,1,float16,fp8,0,3.9898128509521484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,16,2,128,1,float16,float16,0,4.520132827758789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,16,2,128,1,fp8,fp8,0,3.9721424102783205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,16,4,128,1,float16,fp8,0,4.206494522094727
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,16,4,128,1,float16,float16,0,4.6202239990234375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,16,16,128,1,float16,fp8,0,2.7516447067260743
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,16,16,128,1,float16,float16,0,3.0000015258789063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,16,4,128,1,fp8,fp8,0,4.196680068969727
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,16,1,128,1,float16,float16,0,2.1374351501464846
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,16,16,128,1,fp8,fp8,0,2.7625280380249024
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,16,8,128,1,float16,fp8,0,4.622727966308593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,16,8,128,1,float16,float16,0,5.2164161682128904
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,16,8,128,1,fp8,fp8,0,4.593376159667969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,16,1,128,1,float16,fp8,0,1.9405824661254882
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,16,1,128,1,fp8,fp8,0,1.9961456298828124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,16,2,128,1,float16,float16,0,2.19532470703125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,16,2,128,1,float16,fp8,0,2.0330047607421875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,16,2,128,1,fp8,fp8,0,2.005142402648926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,16,4,128,1,float16,float16,0,2.3297216415405275
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,16,4,128,1,float16,fp8,0,2.1624704360961915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,16,4,128,1,fp8,fp8,0,2.0974895477294924
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,16,16,128,1,float16,float16,0,1.4960847854614259
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,16,16,128,1,float16,fp8,0,1.3968432426452637
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,16,8,128,1,float16,float16,0,2.545599937438965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,16,16,128,1,fp8,fp8,0,1.481062412261963
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,16,1,128,1,float16,float16,0,1.0436079978942872
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,16,8,128,1,float16,fp8,0,2.327555274963379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,16,1,128,1,float16,fp8,0,0.9843135833740234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,16,8,128,1,fp8,fp8,0,2.3875999450683594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,16,1,128,1,fp8,fp8,0,1.0210960388183594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,16,2,128,1,float16,float16,0,1.0788800239562988
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,16,2,128,1,float16,fp8,0,1.0606464385986327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,16,2,128,1,fp8,fp8,0,1.0257935523986816
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,16,4,128,1,float16,float16,0,1.1252655982971191
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,16,4,128,1,float16,fp8,0,1.072003173828125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,16,4,128,1,fp8,fp8,0,1.062654399871826
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,16,8,128,1,float16,float16,0,1.2492015838623047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,16,16,128,1,float16,float16,0,0.776255989074707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,16,8,128,1,float16,fp8,0,1.1809552192687989
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,16,8,128,1,fp8,fp8,0,1.1781311988830567
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,16,16,128,1,float16,fp8,0,0.7162576198577881
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,16,16,128,1,fp8,fp8,0,0.7308512210845948
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,16,1,128,1,float16,fp8,0,0.507422399520874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,16,1,128,1,float16,float16,0,0.5321280002593994
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,16,1,128,1,fp8,fp8,0,0.5272880077362061
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,16,2,128,1,float16,float16,0,0.5480944156646729
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,16,2,128,1,float16,fp8,0,0.5236063957214355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,16,2,128,1,fp8,fp8,0,0.5222000122070313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,16,4,128,1,float16,fp8,0,0.5493552207946777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,16,4,128,1,float16,float16,0,0.5696847915649415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,16,4,128,1,fp8,fp8,0,0.5500175952911377
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,16,8,128,1,fp8,fp8,0,0.602345609664917
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,16,8,128,1,float16,float16,0,0.6370016098022461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,16,16,128,1,float16,float16,0,0.4070144176483154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,16,1,128,1,float16,float16,0,0.2777071952819824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,16,1,128,1,float16,fp8,0,0.26443679332733155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,16,8,128,1,float16,fp8,0,0.6105023860931397
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,16,16,128,1,float16,fp8,0,0.37271039485931395
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,16,16,128,1,fp8,fp8,0,0.37209439277648926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,16,1,128,1,fp8,fp8,0,0.2620975971221924
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,16,2,128,1,float16,float16,0,0.2857151985168457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,16,2,128,1,float16,fp8,0,0.2714639902114868
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,16,2,128,1,fp8,fp8,0,0.27300961017608644
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,16,4,128,1,float16,float16,0,0.3004447937011719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,16,4,128,1,float16,fp8,0,0.2867759943008423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,16,4,128,1,fp8,fp8,0,0.28894720077514646
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,16,8,128,1,float16,float16,0,0.33241920471191405
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,16,8,128,1,float16,fp8,0,0.31357760429382325
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,16,8,128,1,fp8,fp8,0,0.31257920265197753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,16,1,128,1,float16,fp8,0,3.700823974609375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,16,1,128,1,float16,float16,0,4.158059310913086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,16,1,128,1,fp8,fp8,0,3.673737716674805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,16,2,128,1,float16,fp8,0,3.785478210449219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,16,2,128,1,float16,float16,0,4.2583873748779295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,16,2,128,1,fp8,fp8,0,3.815496063232422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,16,4,128,1,float16,float16,0,4.500668716430664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,16,4,128,1,float16,fp8,0,4.08653450012207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,16,16,128,1,float16,float16,0,3.0935951232910157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,16,4,128,1,fp8,fp8,0,4.145318222045899
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,16,16,128,1,float16,fp8,0,3.0059072494506838
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,16,16,128,1,fp8,fp8,0,2.919950485229492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,16,8,128,1,float16,fp8,0,4.645982360839843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,16,8,128,1,float16,float16,0,5.15275993347168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,16,8,128,1,fp8,fp8,0,4.731622314453125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,16,1,128,1,float16,float16,0,1.9440576553344726
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,16,1,128,1,float16,fp8,0,1.8591695785522462
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,16,1,128,1,fp8,fp8,0,1.8418207168579102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,16,2,128,1,float16,float16,0,2.095979118347168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,16,2,128,1,float16,fp8,0,1.9085296630859374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,16,2,128,1,fp8,fp8,0,1.9066368103027345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,16,4,128,1,float16,float16,0,2.246187210083008
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,16,4,128,1,float16,fp8,0,2.0623151779174806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,16,4,128,1,fp8,fp8,0,2.1186351776123047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,16,16,128,1,float16,float16,0,1.5400927543640137
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,16,8,128,1,float16,float16,0,2.5150783538818358
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,16,16,128,1,float16,fp8,0,1.4706624031066895
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,16,1,128,1,float16,float16,0,0.9854031562805176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,16,8,128,1,float16,fp8,0,2.3494319915771484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,16,8,128,1,fp8,fp8,0,2.4364992141723634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,16,1,128,1,float16,fp8,0,0.9400431632995605
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,16,16,128,1,fp8,fp8,0,1.552025604248047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,16,1,128,1,fp8,fp8,0,0.9365728378295899
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,16,2,128,1,float16,float16,0,1.0411616325378419
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,16,2,128,1,float16,fp8,0,0.9688847541809082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,16,2,128,1,fp8,fp8,0,1.00590238571167
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,16,4,128,1,float16,float16,0,1.104100799560547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,16,4,128,1,float16,fp8,0,1.044585609436035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,16,4,128,1,fp8,fp8,0,1.0456591606140138
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,16,8,128,1,float16,float16,0,1.2564736366271974
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,16,8,128,1,float16,fp8,0,1.1854111671447753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,16,16,128,1,float16,float16,0,0.7847455978393555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,16,16,128,1,float16,fp8,0,0.753272008895874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,16,8,128,1,fp8,fp8,0,1.2138239860534668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,16,1,128,1,float16,float16,0,0.5025599956512451
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,16,16,128,1,fp8,fp8,0,0.759441614151001
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,16,1,128,1,float16,fp8,0,0.4848832130432129
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,16,1,128,1,fp8,fp8,0,0.5076191902160645
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,16,2,128,1,float16,float16,0,0.5150063991546631
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,16,2,128,1,float16,fp8,0,0.49916958808898926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,16,2,128,1,fp8,fp8,0,0.5014383792877197
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,16,4,128,1,float16,float16,0,0.5609888076782227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,16,4,128,1,float16,fp8,0,0.5367472171783447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,16,4,128,1,fp8,fp8,0,0.5356639862060547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,16,8,128,1,float16,float16,0,0.6355663776397705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,16,8,128,1,float16,fp8,0,0.6132319927215576
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,16,8,128,1,fp8,fp8,0,0.6047520160675048
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,16,16,128,1,float16,float16,0,0.4031375885009766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,16,2,128,1,float16,float16,0,0.2739696025848389
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,16,16,128,1,float16,fp8,0,0.3910559892654419
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,16,16,128,1,fp8,fp8,0,0.38880798816680906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,16,1,128,1,float16,float16,0,0.2640415906906128
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,16,1,128,1,float16,fp8,0,0.2564192056655884
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,16,1,128,1,fp8,fp8,0,0.25809440612792967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,16,2,128,1,float16,fp8,0,0.2629983901977539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,16,2,128,1,fp8,fp8,0,0.26323039531707765
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,16,4,128,1,float16,float16,0,0.2931983947753906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,16,4,128,1,float16,fp8,0,0.28481760025024416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,16,4,128,1,fp8,fp8,0,0.2848527908325195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,16,8,128,1,float16,float16,0,0.32964320182800294
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,16,8,128,1,float16,fp8,0,0.31732640266418455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,16,8,128,1,fp8,fp8,0,0.31516480445861816
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,16,16,128,1,float16,float16,0,0.21360960006713867
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,16,16,128,1,float16,fp8,0,0.20423519611358643
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,16,16,128,1,fp8,fp8,0,0.20581440925598143
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,16,1,128,1,float16,float16,0,0.14091999530792237
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,16,1,128,1,float16,fp8,0,0.1328719973564148
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,16,1,128,1,fp8,fp8,0,0.12984319925308227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,16,2,128,1,float16,float16,0,0.14704960584640503
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,16,2,128,1,float16,fp8,0,0.1350543975830078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,16,2,128,1,fp8,fp8,0,0.13518719673156737
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,16,8,128,1,fp8,fp8,0,0.1681167960166931
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,16,4,128,1,float16,float16,0,0.1573184013366699
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,16,4,128,1,float16,fp8,0,0.14451199769973755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,16,4,128,1,fp8,fp8,0,0.1441215991973877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,16,8,128,1,float16,float16,0,0.1772480010986328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,16,8,128,1,float16,fp8,0,0.16579999923706054
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,16,1,128,1,float16,float16,0,2.314980888366699
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,16,1,128,1,float16,fp8,0,2.191092872619629
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,16,1,128,1,fp8,fp8,0,2.190902328491211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,16,2,128,1,float16,float16,0,2.4500896453857424
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,16,2,128,1,float16,fp8,0,2.2938720703125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,16,2,128,1,fp8,fp8,0,2.3093759536743166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,16,4,128,1,float16,float16,0,2.7134576797485352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,16,4,128,1,float16,fp8,0,2.5761423110961914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,16,4,128,1,fp8,fp8,0,2.509079933166504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,16,16,128,1,float16,float16,0,2.0113264083862306
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,16,8,128,1,float16,float16,0,3.130299186706543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,16,16,128,1,float16,fp8,0,1.9701087951660157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,16,8,128,1,fp8,fp8,0,2.939675140380859
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,16,8,128,1,float16,fp8,0,3.047662353515625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,16,1,128,1,float16,float16,0,1.1602160453796386
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,16,16,128,1,fp8,fp8,0,1.9400815963745117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,16,1,128,1,float16,fp8,0,1.1172800064086914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,16,1,128,1,fp8,fp8,0,1.1084783554077149
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,16,2,128,1,float16,float16,0,1.2301823616027832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,16,2,128,1,float16,fp8,0,1.1673199653625488
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,16,2,128,1,fp8,fp8,0,1.162844753265381
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,16,4,128,1,float16,float16,0,1.3391183853149413
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,16,4,128,1,float16,fp8,0,1.3173888206481934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,16,4,128,1,fp8,fp8,0,1.2825535774230956
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,16,8,128,1,float16,fp8,0,1.4846511840820313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,16,16,128,1,float16,fp8,0,0.9741120338439941
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,16,8,128,1,float16,float16,0,1.5598608016967774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,16,16,128,1,float16,float16,0,1.0232879638671875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,16,1,128,1,float16,fp8,0,0.5764544010162354
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,16,8,128,1,fp8,fp8,0,1.524392032623291
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,16,1,128,1,float16,float16,0,0.592033576965332
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,16,16,128,1,fp8,fp8,0,0.9856320381164551
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,16,1,128,1,fp8,fp8,0,0.5691184043884278
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,16,2,128,1,float16,float16,0,0.6167344093322754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,16,2,128,1,float16,fp8,0,0.6163951873779296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,16,2,128,1,fp8,fp8,0,0.5985263824462891
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,16,4,128,1,float16,float16,0,0.6784175872802735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,16,4,128,1,float16,fp8,0,0.6508927822113038
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,16,4,128,1,fp8,fp8,0,0.6512288093566895
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,16,8,128,1,float16,float16,0,0.788592004776001
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,16,8,128,1,float16,fp8,0,0.7563136100769043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,16,8,128,1,fp8,fp8,0,0.751859188079834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,16,16,128,1,float16,float16,0,0.5312560081481934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,16,16,128,1,float16,fp8,0,0.5017744064331054
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,16,16,128,1,fp8,fp8,0,0.50348801612854
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,16,1,128,1,float16,float16,0,0.3074671983718872
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,16,1,128,1,float16,fp8,0,0.2991312026977539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,16,4,128,1,float16,float16,0,0.34937119483947754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,16,1,128,1,fp8,fp8,0,0.29937760829925536
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,16,2,128,1,float16,float16,0,0.319486403465271
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,16,2,128,1,float16,fp8,0,0.3098560094833374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,16,2,128,1,fp8,fp8,0,0.3122528076171875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,16,4,128,1,float16,fp8,0,0.3396399974822998
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,16,4,128,1,fp8,fp8,0,0.3405632019042969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,16,8,128,1,float16,fp8,0,0.3917232036590576
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,16,8,128,1,float16,float16,0,0.40743198394775393
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,16,8,128,1,fp8,fp8,0,0.3919584035873413
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,16,16,128,1,float16,float16,0,0.2781759977340698
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,16,16,128,1,float16,fp8,0,0.26138720512390134
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,16,16,128,1,fp8,fp8,0,0.2627360105514526
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,16,1,128,1,float16,float16,0,0.16319520473480226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,16,1,128,1,float16,fp8,0,0.1562559962272644
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,16,1,128,1,fp8,fp8,0,0.1557471990585327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,16,2,128,1,float16,float16,0,0.17241599559783935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,16,2,128,1,float16,fp8,0,0.16470240354537963
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,16,2,128,1,fp8,fp8,0,0.16449439525604248
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,16,4,128,1,float16,float16,0,0.1855631947517395
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,16,4,128,1,float16,fp8,0,0.17884000539779663
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,16,4,128,1,fp8,fp8,0,0.1804960012435913
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,16,8,128,1,float16,float16,0,0.2164367914199829
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,16,8,128,1,float16,fp8,0,0.20684640407562255
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,16,8,128,1,fp8,fp8,0,0.20920000076293946
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,16,16,128,1,float16,float16,0,0.1536303997039795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,16,16,128,1,float16,fp8,0,0.14205919504165648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,16,16,128,1,fp8,fp8,0,0.1418176054954529
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,16,1,128,1,float16,float16,0,0.09280800223350524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,16,1,128,1,float16,fp8,0,0.08821759819984436
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,16,1,128,1,fp8,fp8,0,0.08814719915390015
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,16,2,128,1,float16,float16,0,0.09268959760665893
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,16,8,128,1,float16,float16,0,0.12070720195770264
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,16,2,128,1,float16,fp8,0,0.09062560200691223
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,16,2,128,1,fp8,fp8,0,0.09121440052986145
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,16,4,128,1,float16,float16,0,0.10295840501785278
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,16,4,128,1,float16,fp8,0,0.09682559967041016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,16,4,128,1,fp8,fp8,0,0.09778879880905152
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,16,8,128,1,float16,fp8,0,0.10904639959335327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,16,8,128,1,fp8,fp8,0,0.11139520406723022
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,16,1,128,1,float16,float16,0,2.2464879989624023
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,16,1,128,1,float16,fp8,0,2.184432029724121
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,16,1,128,1,fp8,fp8,0,2.1743392944335938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,16,2,128,1,float16,float16,0,2.3706127166748048
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,16,2,128,1,float16,fp8,0,2.330049514770508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,16,2,128,1,fp8,fp8,0,2.3346511840820314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,16,4,128,1,float16,float16,0,2.753193664550781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,16,4,128,1,float16,fp8,0,2.598873519897461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,16,4,128,1,fp8,fp8,0,2.638523292541504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,16,16,128,1,float16,float16,0,2.2480304718017576
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,16,16,128,1,float16,fp8,0,2.1917375564575194
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,16,8,128,1,float16,float16,0,3.3105567932128905
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,16,8,128,1,float16,fp8,0,3.1791759490966798
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,16,8,128,1,fp8,fp8,0,3.203371047973633
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,16,1,128,1,float16,float16,0,1.1348591804504395
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,16,16,128,1,fp8,fp8,0,2.1811344146728517
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,16,1,128,1,float16,fp8,0,1.1012895584106446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,16,1,128,1,fp8,fp8,0,1.1115599632263184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,16,2,128,1,float16,float16,0,1.2505104064941406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,16,2,128,1,float16,fp8,0,1.1773807525634765
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,16,2,128,1,fp8,fp8,0,1.184119987487793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,16,4,128,1,float16,float16,0,1.3943488121032714
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,16,4,128,1,float16,fp8,0,1.311350440979004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,16,4,128,1,fp8,fp8,0,1.3124223709106446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,16,8,128,1,float16,float16,0,1.6567472457885741
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,16,16,128,1,float16,float16,0,1.1415151596069335
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,16,1,128,1,float16,float16,0,0.5789936065673829
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,16,8,128,1,fp8,fp8,0,1.6054416656494142
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,16,8,128,1,float16,fp8,0,1.6498592376708985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,16,16,128,1,float16,fp8,0,1.1267680168151855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,16,16,128,1,fp8,fp8,0,1.1053471565246582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,16,1,128,1,float16,fp8,0,0.5659615993499756
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,16,1,128,1,fp8,fp8,0,0.5706960201263428
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,16,2,128,1,fp8,fp8,0,0.6046656131744385
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,16,2,128,1,float16,float16,0,0.6162303924560547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,16,2,128,1,float16,fp8,0,0.6005055904388428
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,16,4,128,1,float16,float16,0,0.6944896221160889
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,16,4,128,1,float16,fp8,0,0.6686272144317627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,16,4,128,1,fp8,fp8,0,0.6731904029846192
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,16,8,128,1,float16,float16,0,0.8430064201354981
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,16,16,128,1,float16,float16,0,0.5796351909637452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,16,8,128,1,float16,fp8,0,0.8187919616699219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,16,8,128,1,fp8,fp8,0,0.8206080436706543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,16,16,128,1,float16,fp8,0,0.5664095878601074
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,16,1,128,1,float16,float16,0,0.2999759912490845
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,16,16,128,1,fp8,fp8,0,0.5693984031677246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,16,1,128,1,float16,fp8,0,0.2975759983062744
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,16,1,128,1,fp8,fp8,0,0.29839200973510743
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,16,2,128,1,float16,float16,0,0.3196223974227905
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,16,2,128,1,float16,fp8,0,0.31487839221954345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,16,2,128,1,fp8,fp8,0,0.3175407886505127
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,16,4,128,1,float16,float16,0,0.35992319583892823
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,16,4,128,1,float16,fp8,0,0.34729599952697754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,16,4,128,1,fp8,fp8,0,0.3495232105255127
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,16,8,128,1,float16,float16,0,0.43587360382080076
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,16,1,128,1,float16,float16,0,0.16321120262145997
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,16,8,128,1,float16,fp8,0,0.4221487998962402
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,16,8,128,1,fp8,fp8,0,0.41866240501403806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,16,16,128,1,float16,float16,0,0.3053344011306763
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,16,2,128,1,float16,fp8,0,0.17104640007019042
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,16,16,128,1,float16,fp8,0,0.29819040298461913
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,16,16,128,1,fp8,fp8,0,0.29565279483795165
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,16,1,128,1,float16,fp8,0,0.16274080276489258
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,16,1,128,1,fp8,fp8,0,0.16155840158462526
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,16,2,128,1,float16,float16,0,0.17189760208129884
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,16,2,128,1,fp8,fp8,0,0.17091679573059082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,16,4,128,1,float16,float16,0,0.1909824013710022
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,16,4,128,1,float16,fp8,0,0.1892959952354431
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,16,4,128,1,fp8,fp8,0,0.18916319608688353
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,16,8,128,1,float16,float16,0,0.22899200916290283
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,16,8,128,1,float16,fp8,0,0.22519199848175048
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,16,8,128,1,fp8,fp8,0,0.2211616039276123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,16,16,128,1,float16,float16,0,0.1622496008872986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,16,16,128,1,float16,fp8,0,0.15931999683380127
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,16,16,128,1,fp8,fp8,0,0.15867840051651
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,16,1,128,1,float16,float16,0,0.08771359920501709
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,16,1,128,1,float16,fp8,0,0.08450080156326294
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,16,1,128,1,fp8,fp8,0,0.08413280248641967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,16,2,128,1,float16,float16,0,0.09514560103416443
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,16,2,128,1,float16,fp8,0,0.08922240138053894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,16,8,128,1,float16,fp8,0,0.12000800371170044
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,16,2,128,1,fp8,fp8,0,0.08985279798507691
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,16,4,128,1,float16,float16,0,0.10528320074081421
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,16,4,128,1,float16,fp8,0,0.10073920488357543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,16,4,128,1,fp8,fp8,0,0.09783359766006469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,16,8,128,1,float16,float16,0,0.12536959648132323
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,16,8,128,1,fp8,fp8,0,0.11956479549407958
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,16,16,128,1,float16,float16,0,0.09016799926757812
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,16,16,128,1,float16,fp8,0,0.0824400007724762
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,16,16,128,1,fp8,fp8,0,0.08274080157279969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,16,1,128,1,float16,float16,0,0.05182719826698303
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,16,1,128,1,float16,fp8,0,0.051819199323654176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,16,1,128,1,fp8,fp8,0,0.05171040296554565
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,16,2,128,1,float16,float16,0,0.05257599949836731
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,16,2,128,1,float16,fp8,0,0.05292479991912842
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,16,2,128,1,fp8,fp8,0,0.05267999768257141
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,16,4,128,1,float16,float16,0,0.057313597202301024
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,16,4,128,1,float16,fp8,0,0.056831997632980344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,16,4,128,1,fp8,fp8,0,0.05730080008506775
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,16,8,128,1,float16,float16,0,0.06572800278663635
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,16,8,128,1,float16,fp8,0,0.06427199840545654
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,16,8,128,1,fp8,fp8,0,0.06445760130882264
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,16,1,128,1,float16,float16,0,1.397118377685547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,16,1,128,1,float16,fp8,0,1.3725104331970215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,16,1,128,1,fp8,fp8,0,1.3755776405334472
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,16,2,128,1,float16,float16,0,1.4993776321411132
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,16,2,128,1,float16,fp8,0,1.4892319679260253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,16,2,128,1,fp8,fp8,0,1.4850992202758788
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,16,4,128,1,float16,float16,0,1.7333311080932616
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,16,4,128,1,float16,fp8,0,1.6927391052246095
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,16,4,128,1,fp8,fp8,0,1.7016752243041993
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,16,8,128,1,float16,float16,0,2.1777856826782225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,16,8,128,1,float16,fp8,0,2.125268745422363
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,16,16,128,1,float16,float16,0,1.541759967803955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,16,1,128,1,float16,float16,0,0.709219217300415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,16,8,128,1,fp8,fp8,0,2.1223615646362304
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,16,16,128,1,float16,fp8,0,1.516044807434082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,16,16,128,1,fp8,fp8,0,1.5162591934204102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,16,1,128,1,float16,fp8,0,0.7056511878967285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,16,1,128,1,fp8,fp8,0,0.7016863822937012
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,16,2,128,1,float16,float16,0,0.7640575885772705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,16,2,128,1,float16,fp8,0,0.7560368061065674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,16,2,128,1,fp8,fp8,0,0.7542304039001465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,16,4,128,1,float16,fp8,0,0.8600591659545899
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,16,4,128,1,float16,float16,0,0.8783408164978027
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,16,4,128,1,fp8,fp8,0,0.8636192321777344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,16,8,128,1,float16,float16,0,1.1012703895568847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,16,8,128,1,float16,fp8,0,1.0780688285827638
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,16,8,128,1,fp8,fp8,0,1.0818976402282714
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,16,16,128,1,float16,float16,0,0.7858096122741699
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,16,16,128,1,float16,fp8,0,0.7715487957000733
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,16,16,128,1,fp8,fp8,0,0.7713103771209717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,16,1,128,1,float16,float16,0,0.3706975936889648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,16,1,128,1,float16,fp8,0,0.3628959894180298
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,16,1,128,1,fp8,fp8,0,0.36372320652008056
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,16,2,128,1,float16,float16,0,0.39743680953979493
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,16,2,128,1,float16,fp8,0,0.3890608072280884
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,16,2,128,1,fp8,fp8,0,0.3903775930404663
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,16,4,128,1,float16,float16,0,0.45185279846191406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,16,4,128,1,float16,fp8,0,0.44327521324157715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,16,4,128,1,fp8,fp8,0,0.4410031795501709
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,16,8,128,1,float16,float16,0,0.5626704216003418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,16,8,128,1,float16,fp8,0,0.5534351825714111
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,16,16,128,1,float16,fp8,0,0.39957919120788576
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,16,16,128,1,float16,float16,0,0.40611681938171384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,16,8,128,1,fp8,fp8,0,0.5507359981536866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,16,16,128,1,fp8,fp8,0,0.39864161014556887
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,16,1,128,1,float16,float16,0,0.19629759788513185
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,16,1,128,1,float16,fp8,0,0.1946239948272705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,16,1,128,1,fp8,fp8,0,0.19472960233688355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,16,2,128,1,float16,float16,0,0.21118080615997314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,16,2,128,1,float16,fp8,0,0.20842719078063965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,16,2,128,1,fp8,fp8,0,0.20868959426879882
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,16,4,128,1,float16,float16,0,0.237280011177063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,16,4,128,1,float16,fp8,0,0.2337424039840698
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,16,4,128,1,fp8,fp8,0,0.23370399475097656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,16,8,128,1,float16,float16,0,0.29448800086975097
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,16,8,128,1,float16,fp8,0,0.28960959911346434
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,16,8,128,1,fp8,fp8,0,0.2903824090957642
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,16,16,128,1,float16,float16,0,0.21597440242767335
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,16,16,128,1,float16,fp8,0,0.21261920928955078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,16,16,128,1,fp8,fp8,0,0.21203839778900146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,16,1,128,1,float16,float16,0,0.10848640203475952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,16,4,128,1,float16,float16,0,0.13018720149993895
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,16,1,128,1,float16,fp8,0,0.10489280223846435
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,16,1,128,1,fp8,fp8,0,0.10426080226898193
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,16,2,128,1,float16,float16,0,0.11610720157623292
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,16,2,128,1,float16,fp8,0,0.11482720375061035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,16,2,128,1,fp8,fp8,0,0.11432960033416747
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,16,4,128,1,float16,fp8,0,0.1273408055305481
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,16,4,128,1,fp8,fp8,0,0.127947199344635
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,16,8,128,1,float16,float16,0,0.1587488055229187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,16,8,128,1,float16,fp8,0,0.15689760446548462
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,16,8,128,1,fp8,fp8,0,0.15722399950027466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,16,16,128,1,float16,fp8,0,0.11477760076522828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,16,16,128,1,float16,float16,0,0.11883679628372193
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,16,16,128,1,fp8,fp8,0,0.11492799520492554
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,16,1,128,1,float16,float16,0,0.06036319732666016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,16,1,128,1,float16,fp8,0,0.060108798742294314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,16,1,128,1,fp8,fp8,0,0.06035040020942688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,16,2,128,1,float16,float16,0,0.06381120085716248
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,16,2,128,1,float16,fp8,0,0.06285920143127441
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,16,2,128,1,fp8,fp8,0,0.06268799901008607
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,16,4,128,1,float16,float16,0,0.07126399874687195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,16,4,128,1,float16,fp8,0,0.06901440024375916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,16,4,128,1,fp8,fp8,0,0.06882719993591309
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,16,8,128,1,float16,float16,0,0.08960480093955994
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,16,8,128,1,float16,fp8,0,0.08357920050621033
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,16,8,128,1,fp8,fp8,0,0.08259680271148681
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,16,16,128,1,float16,float16,0,0.06531999707221985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,16,16,128,1,float16,fp8,0,0.06149439811706543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,16,16,128,1,fp8,fp8,0,0.06224160194396973
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,16,1,128,1,float16,float16,0,0.0402319997549057
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,16,1,128,1,float16,fp8,0,0.04058560132980347
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,16,1,128,1,fp8,fp8,0,0.04056800007820129
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,16,2,128,1,float16,float16,0,0.041310399770736694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,16,2,128,1,float16,fp8,0,0.041203200817108154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,16,2,128,1,fp8,fp8,0,0.04120000004768372
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,16,4,128,1,float16,float16,0,0.04365760087966919
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,16,4,128,1,float16,fp8,0,0.04423199892044068
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,16,4,128,1,fp8,fp8,0,0.043700799345970154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,16,8,128,1,float16,float16,0,0.049830400943756105
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,16,8,128,1,float16,fp8,0,0.04994719922542572
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,16,8,128,1,fp8,fp8,0,0.050040000677108766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,16,1,128,1,float16,float16,0,1.4468640327453612
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,16,1,128,1,float16,fp8,0,1.452121639251709
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,16,1,128,1,fp8,fp8,0,1.4557519912719727
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,16,2,128,1,float16,float16,0,1.597555160522461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,16,2,128,1,float16,fp8,0,1.587320041656494
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,16,2,128,1,fp8,fp8,0,1.5892271995544434
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,16,4,128,1,float16,float16,0,1.9012592315673829
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,16,4,128,1,float16,fp8,0,1.878468894958496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,16,4,128,1,fp8,fp8,0,1.9040943145751954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,16,8,128,1,float16,float16,0,2.4925312042236327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,16,1,128,1,float16,float16,0,0.7415103912353516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,16,16,128,1,float16,float16,0,1.8531375885009767
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,16,8,128,1,float16,fp8,0,2.454572868347168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,16,16,128,1,float16,fp8,0,1.8458480834960938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,16,8,128,1,fp8,fp8,0,2.4526479721069334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,16,16,128,1,fp8,fp8,0,1.8287216186523438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,16,1,128,1,float16,fp8,0,0.7378799915313721
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,16,2,128,1,float16,float16,0,0.8082143783569335
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,16,1,128,1,fp8,fp8,0,0.7411375999450683
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,16,2,128,1,float16,fp8,0,0.8075759887695313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,16,2,128,1,fp8,fp8,0,0.8174863815307617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,16,4,128,1,float16,float16,0,0.9587856292724609
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,16,4,128,1,float16,fp8,0,0.9518464088439942
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,16,4,128,1,fp8,fp8,0,0.9484416007995605
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,16,8,128,1,float16,float16,0,1.255897617340088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,16,8,128,1,float16,fp8,0,1.2448880195617675
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,16,1,128,1,float16,float16,0,0.3838399887084961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,16,16,128,1,float16,float16,0,0.9475888252258301
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,16,8,128,1,fp8,fp8,0,1.2412927627563477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,16,16,128,1,float16,fp8,0,0.9244015693664551
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,16,1,128,1,float16,fp8,0,0.38008480072021483
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,16,16,128,1,fp8,fp8,0,0.9271856307983398
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,16,1,128,1,fp8,fp8,0,0.383622407913208
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,16,2,128,1,float16,float16,0,0.4204832077026367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,16,2,128,1,float16,fp8,0,0.420963191986084
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,16,2,128,1,fp8,fp8,0,0.4173583984375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,16,4,128,1,float16,float16,0,0.49372000694274903
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,16,4,128,1,float16,fp8,0,0.48700799942016604
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,16,4,128,1,fp8,fp8,0,0.48514719009399415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,16,8,128,1,float16,float16,0,0.6447679996490479
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,16,8,128,1,float16,fp8,0,0.6348271846771241
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,16,8,128,1,fp8,fp8,0,0.631059217453003
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,16,16,128,1,float16,float16,0,0.48648481369018554
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,16,16,128,1,float16,fp8,0,0.4748335838317871
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,16,1,128,1,float16,float16,0,0.2040208101272583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,16,16,128,1,fp8,fp8,0,0.4743663787841797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,16,1,128,1,float16,fp8,0,0.2022495985031128
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,16,1,128,1,fp8,fp8,0,0.20417120456695556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,16,2,128,1,float16,float16,0,0.22524960041046144
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,16,2,128,1,float16,fp8,0,0.22083680629730223
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,16,2,128,1,fp8,fp8,0,0.22269599437713622
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,16,4,128,1,float16,float16,0,0.25954880714416506
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,16,4,128,1,float16,fp8,0,0.25604801177978515
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,16,4,128,1,fp8,fp8,0,0.2558784008026123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,16,8,128,1,float16,float16,0,0.332476806640625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,16,8,128,1,float16,fp8,0,0.32769439220428465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,16,8,128,1,fp8,fp8,0,0.3268064022064209
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,16,16,128,1,float16,float16,0,0.2556368112564087
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,16,16,128,1,float16,fp8,0,0.24851040840148925
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,16,16,128,1,fp8,fp8,0,0.2484447956085205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,16,1,128,1,float16,float16,0,0.11521919965744018
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,16,1,128,1,float16,fp8,0,0.1140720009803772
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,16,1,128,1,fp8,fp8,0,0.11420960426330566
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,16,2,128,1,float16,float16,0,0.12313280105590821
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,16,2,128,1,float16,fp8,0,0.122433602809906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,16,2,128,1,fp8,fp8,0,0.12303520441055298
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,16,4,128,1,float16,float16,0,0.1424623966217041
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,16,4,128,1,float16,fp8,0,0.1400015950202942
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,16,4,128,1,fp8,fp8,0,0.14075679779052735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,16,8,128,1,float16,float16,0,0.17940800189971923
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,16,8,128,1,float16,fp8,0,0.17549760341644288
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,16,8,128,1,fp8,fp8,0,0.17636159658432007
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,16,16,128,1,float16,float16,0,0.13774880170822143
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,16,16,128,1,float16,fp8,0,0.13447680473327636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,16,16,128,1,fp8,fp8,0,0.1350607991218567
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,16,1,128,1,float16,float16,0,0.06265919804573059
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,16,1,128,1,float16,fp8,0,0.06183680295944214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,16,1,128,1,fp8,fp8,0,0.06179839968681335
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,16,2,128,1,float16,float16,0,0.0696560025215149
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,16,2,128,1,float16,fp8,0,0.06786720156669616
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,16,2,128,1,fp8,fp8,0,0.06674720048904419
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,16,4,128,1,float16,float16,0,0.08095520138740539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,16,4,128,1,float16,fp8,0,0.07538400292396545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,16,4,128,1,fp8,fp8,0,0.07463520169258117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,16,8,128,1,float16,float16,0,0.10110399723052979
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,16,8,128,1,float16,fp8,0,0.09605600237846375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,16,8,128,1,fp8,fp8,0,0.09468160271644592
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,16,16,128,1,float16,float16,0,0.07796319723129272
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,16,16,128,1,float16,fp8,0,0.06963679790496827
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,16,16,128,1,fp8,fp8,0,0.06898559927940369
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,16,4,128,1,float16,float16,0,0.04513440132141113
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,16,1,128,1,float16,float16,0,0.03914720118045807
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,16,1,128,1,float16,fp8,0,0.04060960114002228
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,16,1,128,1,fp8,fp8,0,0.04045760035514832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,16,2,128,1,float16,float16,0,0.03980799913406372
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,16,2,128,1,float16,fp8,0,0.04153760075569153
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,16,2,128,1,fp8,fp8,0,0.04137600064277649
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,16,4,128,1,float16,fp8,0,0.04577600061893463
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,16,4,128,1,fp8,fp8,0,0.04557439982891083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,16,8,128,1,float16,float16,0,0.05357599854469299
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,16,8,128,1,float16,fp8,0,0.053320002555847165
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,16,8,128,1,fp8,fp8,0,0.053915202617645264
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,16,16,128,1,float16,float16,0,0.047142401337623596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,16,16,128,1,float16,fp8,0,0.04753440022468567
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,16,16,128,1,fp8,fp8,0,0.047835201025009155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,16,1,128,1,float16,float16,0,0.03372640013694763
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,16,1,128,1,float16,fp8,0,0.034376001358032225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,16,1,128,1,fp8,fp8,0,0.034748798608779906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,16,2,128,1,float16,float16,0,0.034318399429321286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,16,2,128,1,float16,fp8,0,0.034835198521614076
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,16,2,128,1,fp8,fp8,0,0.03443520069122315
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,16,4,128,1,float16,float16,0,0.03493280112743378
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,16,4,128,1,float16,fp8,0,0.03568480014801025
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,16,4,128,1,fp8,fp8,0,0.035580798983573914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,16,8,128,1,float16,float16,0,0.039860799908638
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,16,8,128,1,float16,fp8,0,0.04126240015029907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,16,8,128,1,fp8,fp8,0,0.040145599842071535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,16,1,128,1,float16,float16,0,1.058670425415039
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,16,1,128,1,float16,fp8,0,1.0887311935424804
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,16,1,128,1,fp8,fp8,0,1.0840784072875977
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,16,2,128,1,float16,float16,0,1.208622360229492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,16,2,128,1,float16,fp8,0,1.2311311721801759
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,16,2,128,1,fp8,fp8,0,1.2268128395080566
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,16,4,128,1,float16,float16,0,1.5098383903503418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,16,4,128,1,float16,fp8,0,1.5048447608947755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,16,4,128,1,fp8,fp8,0,1.513479995727539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,16,8,128,1,float16,float16,0,2.1041168212890624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,16,1,128,1,float16,float16,0,0.5423679828643799
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,16,8,128,1,float16,fp8,0,2.098723220825195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,16,1,128,1,float16,fp8,0,0.5575888156890869
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,16,16,128,1,float16,float16,0,1.660193634033203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,16,8,128,1,fp8,fp8,0,2.105166435241699
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,16,1,128,1,fp8,fp8,0,0.5567471981048584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,16,16,128,1,float16,fp8,0,1.6586383819580077
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,16,2,128,1,fp8,fp8,0,0.627622413635254
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,16,16,128,1,fp8,fp8,0,1.6553775787353515
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,16,2,128,1,float16,float16,0,0.6165008068084716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,16,2,128,1,float16,fp8,0,0.6247456073760986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,16,4,128,1,float16,float16,0,0.7690815925598145
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,16,4,128,1,float16,fp8,0,0.7683119773864746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,16,4,128,1,fp8,fp8,0,0.7654848098754883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,16,8,128,1,float16,float16,0,1.0667807579040527
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,16,8,128,1,float16,fp8,0,1.0647647857666016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,16,16,128,1,float16,fp8,0,0.8389151573181153
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,16,16,128,1,float16,float16,0,0.8404815673828125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,16,8,128,1,fp8,fp8,0,1.0609472274780274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,16,1,128,1,float16,float16,0,0.28698880672454835
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,16,1,128,1,float16,fp8,0,0.29118239879608154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,16,16,128,1,fp8,fp8,0,0.8403072357177734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,16,1,128,1,fp8,fp8,0,0.2926959991455078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,16,2,128,1,float16,float16,0,0.32314720153808596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,16,2,128,1,float16,fp8,0,0.3245984077453613
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,16,2,128,1,fp8,fp8,0,0.3246256113052368
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,16,4,128,1,float16,float16,0,0.39642560482025146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,16,4,128,1,float16,fp8,0,0.39541919231414796
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,16,8,128,1,fp8,fp8,0,0.5441343784332275
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,16,4,128,1,fp8,fp8,0,0.39598400592803956
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,16,1,128,1,float16,float16,0,0.15478399991989136
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,16,8,128,1,float16,float16,0,0.5467919826507568
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,16,8,128,1,float16,fp8,0,0.5436800003051758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,16,16,128,1,float16,float16,0,0.4342959880828857
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,16,16,128,1,float16,fp8,0,0.4310351848602295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,16,16,128,1,fp8,fp8,0,0.4311744213104248
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,16,1,128,1,float16,fp8,0,0.15819519758224487
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,16,1,128,1,fp8,fp8,0,0.15831520557403564
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,16,2,128,1,float16,float16,0,0.17446719408035277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,16,2,128,1,float16,fp8,0,0.17613120079040528
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,16,2,128,1,fp8,fp8,0,0.176145601272583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,16,4,128,1,float16,float16,0,0.20965919494628907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,16,4,128,1,float16,fp8,0,0.2090143918991089
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,16,4,128,1,fp8,fp8,0,0.2092672109603882
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,16,8,128,1,float16,float16,0,0.2848880052566528
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,16,8,128,1,float16,fp8,0,0.28320159912109377
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,16,8,128,1,fp8,fp8,0,0.2835423946380615
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,16,16,128,1,float16,float16,0,0.22876639366149903
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,16,16,128,1,float16,fp8,0,0.2273087978363037
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,16,16,128,1,fp8,fp8,0,0.22617120742797853
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,16,1,128,1,float16,float16,0,0.08989599943161011
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,16,4,128,1,float16,float16,0,0.1169935941696167
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,16,1,128,1,float16,fp8,0,0.09219200015068055
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,16,1,128,1,fp8,fp8,0,0.09244160056114196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,16,2,128,1,float16,float16,0,0.09878559708595276
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,16,2,128,1,float16,fp8,0,0.100108802318573
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,16,2,128,1,fp8,fp8,0,0.10026079416275024
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,16,4,128,1,float16,fp8,0,0.11712479591369629
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,16,4,128,1,fp8,fp8,0,0.11688799858093261
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,16,8,128,1,float16,float16,0,0.15418879985809325
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,16,8,128,1,float16,fp8,0,0.15380799770355225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,16,8,128,1,fp8,fp8,0,0.1537775993347168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,16,16,128,1,float16,float16,0,0.1255519986152649
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,16,16,128,1,float16,fp8,0,0.12588000297546387
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,16,16,128,1,fp8,fp8,0,0.12584320306777955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,16,1,128,1,float16,float16,0,0.05245440006256104
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,16,1,128,1,float16,fp8,0,0.05113599896430969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,16,1,128,1,fp8,fp8,0,0.05154399871826172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,16,2,128,1,float16,float16,0,0.05865439772605896
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,16,2,128,1,float16,fp8,0,0.05730239748954773
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,16,2,128,1,fp8,fp8,0,0.05668479800224304
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,16,4,128,1,float16,float16,0,0.06993439793586731
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,16,4,128,1,float16,fp8,0,0.06430720090866089
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,16,4,128,1,fp8,fp8,0,0.06424000263214111
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,16,8,128,1,float16,float16,0,0.08845599889755248
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,16,8,128,1,float16,fp8,0,0.08486400246620178
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,16,8,128,1,fp8,fp8,0,0.0844543993473053
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,16,16,128,1,float16,float16,0,0.07290560007095337
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,16,16,128,1,float16,fp8,0,0.06786400079727173
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,16,16,128,1,fp8,fp8,0,0.06684319972991944
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,16,1,128,1,float16,float16,0,0.03541919887065888
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,16,1,128,1,float16,fp8,0,0.03652159869670868
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,16,1,128,1,fp8,fp8,0,0.0365664005279541
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,16,2,128,1,float16,float16,0,0.03618400096893311
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,16,2,128,1,float16,fp8,0,0.037441599369049075
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,16,2,128,1,fp8,fp8,0,0.03750079870223999
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,16,4,128,1,float16,float16,0,0.04085280001163483
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,16,4,128,1,float16,fp8,0,0.04200960099697113
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,16,4,128,1,fp8,fp8,0,0.04177919924259186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,16,8,128,1,float16,float16,0,0.049595201015472413
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,16,8,128,1,float16,fp8,0,0.04923200011253357
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,16,8,128,1,fp8,fp8,0,0.049158400297164916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,16,16,128,1,float16,float16,0,0.04075360000133514
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,16,16,128,1,float16,fp8,0,0.0422111988067627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,16,16,128,1,fp8,fp8,0,0.04254559874534607
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,16,1,128,1,float16,float16,0,0.027403199672698976
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,16,1,128,1,float16,fp8,0,0.028676798939704894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,16,1,128,1,fp8,fp8,0,0.028808000683784484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,16,2,128,1,float16,float16,0,0.027955201268196107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,16,2,128,1,float16,fp8,0,0.029319998621940613
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,16,2,128,1,fp8,fp8,0,0.029425600171089174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,16,4,128,1,float16,float16,0,0.0290336012840271
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,16,4,128,1,float16,fp8,0,0.030236798524856567
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,16,4,128,1,fp8,fp8,0,0.030155199766159057
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,16,8,128,1,float16,float16,0,0.033390399813652036
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,16,16,128,1,fp8,fp8,0,0.0348143994808197
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,16,8,128,1,float16,fp8,0,0.03503200113773346
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,16,8,128,1,fp8,fp8,0,0.03497759997844696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,16,16,128,1,float16,float16,0,0.03280960023403168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,16,16,128,1,float16,fp8,0,0.03476159870624542
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,16,1,128,1,float16,float16,0,0.027265599370002745
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,16,1,128,1,float16,fp8,0,0.02874560058116913
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,16,1,128,1,fp8,fp8,0,0.028611201047897338
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,16,2,128,1,float16,fp8,0,0.028790399432182312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,16,2,128,1,float16,float16,0,0.02730720043182373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,16,2,128,1,fp8,fp8,0,0.028940799832344054
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,16,4,128,1,float16,float16,0,0.02759360074996948
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,16,4,128,1,float16,fp8,0,0.029049599170684816
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,16,4,128,1,fp8,fp8,0,0.028939199447631837
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,16,8,128,1,float16,float16,0,0.02858240008354187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,16,8,128,1,float16,fp8,0,0.02964639961719513
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,16,8,128,1,fp8,fp8,0,0.030374398827552794
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,16,1,128,1,float16,float16,0,0.4526031970977783
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,16,1,128,1,float16,fp8,0,0.47099838256835935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,16,1,128,1,fp8,fp8,0,0.47265281677246096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,16,2,128,1,float16,float16,0,0.5264624118804931
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,16,2,128,1,float16,fp8,0,0.5403024196624756
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,16,2,128,1,fp8,fp8,0,0.540334415435791
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,16,4,128,1,float16,float16,0,0.674283218383789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,16,4,128,1,float16,fp8,0,0.6830927848815918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,16,4,128,1,fp8,fp8,0,0.6825744152069092
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,16,8,128,1,float16,float16,0,0.9742848396301269
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,16,8,128,1,float16,fp8,0,0.9805232048034668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,16,8,128,1,fp8,fp8,0,0.978923225402832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,16,16,128,1,float16,float16,0,0.7970960140228271
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,16,16,128,1,float16,fp8,0,0.7827871799468994
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,16,1,128,1,float16,float16,0,0.23922080993652345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,16,16,128,1,fp8,fp8,0,0.7830416202545166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,16,1,128,1,float16,fp8,0,0.24978880882263182
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,16,1,128,1,fp8,fp8,0,0.24822399616241456
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,16,2,128,1,float16,float16,0,0.27661919593811035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,16,2,128,1,float16,fp8,0,0.28368799686431884
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,16,2,128,1,fp8,fp8,0,0.28308959007263185
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,16,4,128,1,float16,float16,0,0.3500511884689331
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,16,4,128,1,float16,fp8,0,0.35306239128112793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,16,4,128,1,fp8,fp8,0,0.3520240068435669
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,16,8,128,1,float16,float16,0,0.5009727954864502
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,16,8,128,1,float16,fp8,0,0.5006192207336426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,16,8,128,1,fp8,fp8,0,0.5014111995697021
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,16,16,128,1,float16,float16,0,0.41255841255187986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,16,16,128,1,float16,fp8,0,0.40323519706726074
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,16,1,128,1,float16,float16,0,0.1318719983100891
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,16,16,128,1,fp8,fp8,0,0.4027872085571289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,16,1,128,1,float16,fp8,0,0.13678560256958008
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,16,1,128,1,fp8,fp8,0,0.13727680444717408
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,16,2,128,1,float16,float16,0,0.15100159645080566
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,16,2,128,1,float16,fp8,0,0.1550336003303528
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,16,8,128,1,float16,float16,0,0.261411190032959
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,16,2,128,1,fp8,fp8,0,0.15450719594955445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,16,4,128,1,float16,float16,0,0.1863312005996704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,16,4,128,1,float16,fp8,0,0.18854880332946777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,16,4,128,1,fp8,fp8,0,0.18809280395507813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,16,8,128,1,float16,fp8,0,0.26165759563446045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,16,8,128,1,fp8,fp8,0,0.26225919723510743
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,16,16,128,1,float16,fp8,0,0.21318719387054444
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,16,16,128,1,float16,float16,0,0.21831040382385253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,16,16,128,1,fp8,fp8,0,0.21327519416809082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,16,1,128,1,float16,float16,0,0.07833600044250488
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,16,1,128,1,float16,fp8,0,0.08148000240325928
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,16,1,128,1,fp8,fp8,0,0.08139039874076844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,16,2,128,1,float16,float16,0,0.08713120222091675
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,16,2,128,1,float16,fp8,0,0.08888159990310669
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,16,2,128,1,fp8,fp8,0,0.08876320123672485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,16,4,128,1,float16,float16,0,0.10548160076141358
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,16,4,128,1,float16,fp8,0,0.10664000511169433
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,16,4,128,1,fp8,fp8,0,0.10624799728393555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,16,8,128,1,float16,float16,0,0.1431488037109375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,16,8,128,1,float16,fp8,0,0.14313280582427979
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,16,8,128,1,fp8,fp8,0,0.14379040002822877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,16,16,128,1,float16,float16,0,0.12002400159835816
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,16,16,128,1,float16,fp8,0,0.11767840385437012
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,16,16,128,1,fp8,fp8,0,0.11801279783248901
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,16,1,128,1,float16,float16,0,0.045542401075363156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,16,1,128,1,float16,fp8,0,0.046107199788093564
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,16,1,128,1,fp8,fp8,0,0.04623199999332428
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,16,2,128,1,float16,float16,0,0.052121597528457644
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,16,2,128,1,float16,fp8,0,0.05100799798965454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,16,2,128,1,fp8,fp8,0,0.051502400636672975
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,16,4,128,1,float16,float16,0,0.06325759887695312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,16,4,128,1,float16,fp8,0,0.05835679769515991
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,16,4,128,1,fp8,fp8,0,0.058627200126647946
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,16,8,128,1,float16,float16,0,0.08277279734611512
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,16,8,128,1,float16,fp8,0,0.07954720258712769
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,16,8,128,1,fp8,fp8,0,0.08008959889411926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,16,16,128,1,float16,float16,0,0.06841279864311219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,16,16,128,1,float16,fp8,0,0.06128479838371277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,16,16,128,1,fp8,fp8,0,0.06182240247726441
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,16,1,128,1,float16,float16,0,0.03099200129508972
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,16,1,128,1,float16,fp8,0,0.03292959928512573
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,16,1,128,1,fp8,fp8,0,0.032996800541877744
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,16,2,128,1,float16,float16,0,0.0321152001619339
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,16,2,128,1,float16,fp8,0,0.034006398916244504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,16,2,128,1,fp8,fp8,0,0.03379679918289184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,16,4,128,1,float16,float16,0,0.036483201384544375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,16,4,128,1,float16,fp8,0,0.03810240030288696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,16,4,128,1,fp8,fp8,0,0.03819360136985779
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,16,8,128,1,float16,float16,0,0.044964799284935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,16,8,128,1,float16,fp8,0,0.045793598890304564
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,16,8,128,1,fp8,fp8,0,0.04570879936218262
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,16,16,128,1,float16,float16,0,0.03816959857940674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,16,16,128,1,float16,fp8,0,0.03871360123157501
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,16,16,128,1,fp8,fp8,0,0.03828639984130859
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,16,1,128,1,float16,float16,0,0.02454719990491867
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,16,1,128,1,float16,fp8,0,0.02569119930267334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,16,1,128,1,fp8,fp8,0,0.025481599569320678
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,16,2,128,1,float16,float16,0,0.02487040013074875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,16,2,128,1,float16,fp8,0,0.025736001133918763
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,16,2,128,1,fp8,fp8,0,0.025916799902915955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,16,4,128,1,float16,float16,0,0.025872001051902772
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,16,4,128,1,float16,fp8,0,0.026447999477386474
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,16,4,128,1,fp8,fp8,0,0.026654401421546937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,16,8,128,1,float16,float16,0,0.030486398935317995
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,16,8,128,1,float16,fp8,0,0.03125439882278443
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,16,8,128,1,fp8,fp8,0,0.03105599880218506
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,16,16,128,1,float16,float16,0,0.03006559908390045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,16,16,128,1,float16,fp8,0,0.0311055988073349
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,16,16,128,1,fp8,fp8,0,0.031044799089431762
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,16,1,128,1,float16,float16,0,0.02404160052537918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,16,1,128,1,float16,fp8,0,0.025148800015449523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,16,1,128,1,fp8,fp8,0,0.025171199440956117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,16,2,128,1,float16,float16,0,0.024432000517845155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,16,2,128,1,float16,fp8,0,0.025417599081993102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,16,2,128,1,fp8,fp8,0,0.024987199902534486
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,16,4,128,1,float16,float16,0,0.024771200120449068
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,16,4,128,1,float16,fp8,0,0.025481599569320678
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,16,4,128,1,fp8,fp8,0,0.025283199548721314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,16,8,128,1,float16,float16,0,0.02545439898967743
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,16,8,128,1,float16,fp8,0,0.02632000148296356
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,16,8,128,1,fp8,fp8,0,0.026743999123573302
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,16,16,128,1,float16,float16,0,0.02414399981498718
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,16,16,128,1,float16,fp8,0,0.025241601467132568
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,16,16,128,1,fp8,fp8,0,0.025302401185035704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,16,1,128,1,float16,float16,0,0.022969600558280946
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,16,1,128,1,float16,fp8,0,0.0234592005610466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,16,1,128,1,fp8,fp8,0,0.02378080040216446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,16,2,128,1,float16,float16,0,0.022852799296379088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,16,2,128,1,float16,fp8,0,0.02393600046634674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,16,2,128,1,fp8,fp8,0,0.024063999950885772
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,16,4,128,1,float16,float16,0,0.022966399788856506
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,16,4,128,1,float16,fp8,0,0.02420639991760254
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,16,4,128,1,fp8,fp8,0,0.023977600038051605
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,16,8,128,1,float16,float16,0,0.023180800676345825
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,16,8,128,1,float16,fp8,0,0.024377599358558655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,16,8,128,1,fp8,fp8,0,0.02433760017156601
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,16,1,128,1,float16,float16,0,0.2247312068939209
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,16,1,128,1,float16,fp8,0,0.23850879669189454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,16,1,128,1,fp8,fp8,0,0.2393199920654297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,16,2,128,1,float16,float16,0,0.261406397819519
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,16,4,128,1,fp8,fp8,0,0.34858078956604005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,16,2,128,1,float16,fp8,0,0.2775183916091919
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,16,2,128,1,fp8,fp8,0,0.27335200309753416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,16,4,128,1,float16,float16,0,0.3342576026916504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,16,8,128,1,fp8,fp8,0,0.4932975769042969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,16,4,128,1,float16,fp8,0,0.3497535943984985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,16,8,128,1,float16,float16,0,0.4844560146331787
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,16,8,128,1,float16,fp8,0,0.49463682174682616
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,16,16,128,1,float16,float16,0,0.40604162216186523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,16,16,128,1,float16,fp8,0,0.40134239196777344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,16,1,128,1,float16,float16,0,0.12286239862442017
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,16,16,128,1,fp8,fp8,0,0.40128159523010254
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,16,1,128,1,float16,fp8,0,0.1313599944114685
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,16,1,128,1,fp8,fp8,0,0.13207999467849732
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,16,2,128,1,float16,float16,0,0.14177759885787963
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,16,2,128,1,float16,fp8,0,0.15037920475006103
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,16,2,128,1,fp8,fp8,0,0.1505295991897583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,16,4,128,1,float16,float16,0,0.17832479476928711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,16,4,128,1,float16,fp8,0,0.18524160385131835
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,16,4,128,1,fp8,fp8,0,0.1853135943412781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,16,8,128,1,float16,float16,0,0.25291359424591064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,16,8,128,1,float16,fp8,0,0.25709118843078616
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,16,8,128,1,fp8,fp8,0,0.257039999961853
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,16,16,128,1,float16,float16,0,0.2172368049621582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,16,16,128,1,float16,fp8,0,0.21058080196380616
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,16,16,128,1,fp8,fp8,0,0.21033918857574463
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,16,1,128,1,float16,float16,0,0.07556800246238708
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,16,1,128,1,float16,fp8,0,0.077675199508667
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,16,1,128,1,fp8,fp8,0,0.07798399925231933
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,16,2,128,1,float16,float16,0,0.0838271975517273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,16,2,128,1,float16,fp8,0,0.08589119911193847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,16,2,128,1,fp8,fp8,0,0.08513759970664977
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,16,4,128,1,float16,float16,0,0.10219199657440185
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,16,4,128,1,float16,fp8,0,0.10333440303802491
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,16,4,128,1,fp8,fp8,0,0.10353920459747315
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,16,8,128,1,float16,float16,0,0.1393712043762207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,16,8,128,1,float16,fp8,0,0.13819199800491333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,16,8,128,1,fp8,fp8,0,0.1386623978614807
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,16,16,128,1,float16,float16,0,0.11736799478530884
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,16,1,128,1,fp8,fp8,0,0.041198399662971494
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,16,16,128,1,float16,fp8,0,0.11323519945144653
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,16,16,128,1,fp8,fp8,0,0.11280959844589233
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,16,1,128,1,float16,float16,0,0.040428799390792844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,16,1,128,1,float16,fp8,0,0.041912001371383664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,16,2,128,1,float16,float16,0,0.04760479927062988
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,16,2,128,1,float16,fp8,0,0.04622560143470764
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,16,2,128,1,fp8,fp8,0,0.04673439860343933
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,16,4,128,1,float16,float16,0,0.06029760241508484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,16,4,128,1,float16,fp8,0,0.054150402545928955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,16,4,128,1,fp8,fp8,0,0.054764801263809205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,16,8,128,1,float16,float16,0,0.080103999376297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,16,8,128,1,float16,fp8,0,0.07400799989700317
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,16,8,128,1,fp8,fp8,0,0.07392320036888123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,16,16,128,1,float16,float16,0,0.066348797082901
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,16,16,128,1,float16,fp8,0,0.057657599449157715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,16,16,128,1,fp8,fp8,0,0.057494401931762695
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,16,1,128,1,float16,float16,0,0.027860799431800844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,16,1,128,1,float16,fp8,0,0.02924000024795532
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,16,1,128,1,fp8,fp8,0,0.02914080023765564
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,16,2,128,1,float16,float16,0,0.02895680069923401
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,16,2,128,1,float16,fp8,0,0.030118399858474733
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,16,2,128,1,fp8,fp8,0,0.030321601033210754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,16,4,128,1,float16,float16,0,0.033687999844551085
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,16,4,128,1,float16,fp8,0,0.03451519906520843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,16,4,128,1,fp8,fp8,0,0.03443840146064758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,16,8,128,1,float16,float16,0,0.042105600237846375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,16,8,128,1,float16,fp8,0,0.04174720048904419
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,16,8,128,1,fp8,fp8,0,0.04163039922714233
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,16,16,128,1,float16,float16,0,0.03638879954814911
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,16,16,128,1,float16,fp8,0,0.036734399199485776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,16,16,128,1,fp8,fp8,0,0.036671999096870425
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,16,1,128,1,float16,float16,0,0.023012800514698027
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,16,1,128,1,float16,fp8,0,0.02364159971475601
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,16,1,128,1,fp8,fp8,0,0.024051199853420257
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,16,2,128,1,float16,float16,0,0.023470400273799895
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,16,2,128,1,float16,fp8,0,0.024089600145816802
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,16,2,128,1,fp8,fp8,0,0.024209600687026978
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,16,4,128,1,float16,float16,0,0.024265600740909575
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,16,4,128,1,float16,fp8,0,0.02512640058994293
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,16,4,128,1,fp8,fp8,0,0.02513279914855957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,16,8,128,1,float16,float16,0,0.028758400678634645
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,16,8,128,1,float16,fp8,0,0.02966879904270172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,16,8,128,1,fp8,fp8,0,0.029548799991607665
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,16,16,128,1,float16,float16,0,0.02836799919605255
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,16,16,128,1,float16,fp8,0,0.02889760136604309
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,16,16,128,1,fp8,fp8,0,0.029054400324821473
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,16,1,128,1,float16,float16,0,0.02268799990415573
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,16,1,128,1,float16,fp8,0,0.023193599283695222
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,16,1,128,1,fp8,fp8,0,0.023416000604629516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,16,2,128,1,float16,float16,0,0.022635200619697572
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,16,2,128,1,float16,fp8,0,0.023740799725055696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,16,2,128,1,fp8,fp8,0,0.02327679991722107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,16,4,128,1,float16,float16,0,0.02292319983243942
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,16,4,128,1,float16,fp8,0,0.023712000250816344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,16,4,128,1,fp8,fp8,0,0.023659199476242065
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,16,8,128,1,float16,float16,0,0.02367520034313202
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,16,8,128,1,float16,fp8,0,0.024259200692176817
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,16,8,128,1,fp8,fp8,0,0.02457599937915802
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,16,2,128,1,float16,float16,0,0.021065600216388702
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,16,16,128,1,float16,float16,0,0.022726400196552275
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,16,16,128,1,float16,fp8,0,0.023318399488925935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,16,16,128,1,fp8,fp8,0,0.023455999791622162
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,16,1,128,1,float16,float16,0,0.021188800036907197
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,16,1,128,1,float16,fp8,0,0.0220223993062973
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,16,1,128,1,fp8,fp8,0,0.022043199837207796
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,16,2,128,1,float16,fp8,0,0.02213599979877472
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,16,2,128,1,fp8,fp8,0,0.021960000693798064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,16,4,128,1,float16,float16,0,0.021447999775409697
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,16,4,128,1,float16,fp8,0,0.022334399819374084
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,16,4,128,1,fp8,fp8,0,0.022257600724697114
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,16,8,128,1,float16,float16,0,0.02181120067834854
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,16,8,128,1,float16,fp8,0,0.022473600506782532
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,16,8,128,1,fp8,fp8,0,0.022519999742507936
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,16,16,128,1,float16,float16,0,0.020822399854660036
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,16,16,128,1,float16,fp8,0,0.0211776003241539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,16,16,128,1,fp8,fp8,0,0.021617600321769716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,16,1,128,1,float16,float16,0,0.020084799826145174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,16,1,128,1,float16,fp8,0,0.020838400721549986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,16,1,128,1,fp8,fp8,0,0.02083680033683777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,16,2,128,1,float16,float16,0,0.020190399885177613
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,16,2,128,1,float16,fp8,0,0.020948800444602966
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,16,2,128,1,fp8,fp8,0,0.021031999588012697
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,16,4,128,1,float16,float16,0,0.02029760032892227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,16,4,128,1,float16,fp8,0,0.021163199841976166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,16,4,128,1,fp8,fp8,0,0.02114560008049011
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,16,8,128,1,float16,float16,0,0.020121599733829498
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,16,8,128,1,float16,fp8,0,0.021209600567817687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,16,8,128,1,fp8,fp8,0,0.021385599672794343
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,16,1,128,1,float16,float16,0,0.12339839935302735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,16,1,128,1,float16,fp8,0,0.13230400085449218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,16,1,128,1,fp8,fp8,0,0.13324480056762694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,16,2,128,1,float16,float16,0,0.1420464038848877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,16,2,128,1,float16,fp8,0,0.15072640180587768
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,16,2,128,1,fp8,fp8,0,0.15031039714813232
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,16,4,128,1,float16,float16,0,0.17830079793930054
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,16,8,128,1,float16,fp8,0,0.25722560882568357
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,16,4,128,1,float16,fp8,0,0.18590400218963624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,16,4,128,1,fp8,fp8,0,0.18583519458770753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,16,8,128,1,float16,float16,0,0.25635039806365967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,16,8,128,1,fp8,fp8,0,0.25679199695587157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,16,16,128,1,float16,float16,0,0.24287519454956055
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,16,16,128,1,float16,fp8,0,0.24605920314788818
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,16,16,128,1,fp8,fp8,0,0.2453536033630371
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,16,1,128,1,float16,float16,0,0.07655680179595947
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,16,1,128,1,float16,fp8,0,0.07857279777526856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,16,1,128,1,fp8,fp8,0,0.07854560017585754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,16,2,128,1,float16,float16,0,0.08368800282478332
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,16,2,128,1,float16,fp8,0,0.08595839738845826
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,16,2,128,1,fp8,fp8,0,0.08647840023040772
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,16,4,128,1,float16,float16,0,0.1030351996421814
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,16,4,128,1,float16,fp8,0,0.10385440587997437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,16,4,128,1,fp8,fp8,0,0.10377919673919678
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,16,8,128,1,float16,float16,0,0.1394976019859314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,16,8,128,1,float16,fp8,0,0.13891680240631105
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,16,8,128,1,fp8,fp8,0,0.13888479471206666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,16,16,128,1,float16,float16,0,0.1326416015625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,16,16,128,1,float16,fp8,0,0.13177599906921386
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,16,16,128,1,fp8,fp8,0,0.1320032000541687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,16,1,128,1,float16,float16,0,0.04074879884719849
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,16,1,128,1,float16,fp8,0,0.0416047990322113
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,16,1,128,1,fp8,fp8,0,0.04144479930400848
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,16,2,128,1,float16,fp8,0,0.045771199464797976
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,16,2,128,1,float16,float16,0,0.04869279861450195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,16,2,128,1,fp8,fp8,0,0.04652799963951111
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,16,4,128,1,float16,float16,0,0.0607088029384613
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,16,4,128,1,float16,fp8,0,0.05398240089416504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,16,4,128,1,fp8,fp8,0,0.053793597221374514
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,16,8,128,1,float16,fp8,0,0.07536640167236328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,16,8,128,1,float16,float16,0,0.08063200116157532
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,16,8,128,1,fp8,fp8,0,0.07461760044097901
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,16,16,128,1,float16,float16,0,0.07410719990730286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,16,16,128,1,float16,fp8,0,0.06696320176124573
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,16,16,128,1,fp8,fp8,0,0.06735680103302003
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,16,1,128,1,float16,float16,0,0.02821120023727417
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,16,1,128,1,float16,fp8,0,0.029462400078773498
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,16,1,128,1,fp8,fp8,0,0.02959040105342865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,16,2,128,1,float16,float16,0,0.029209598898887634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,16,2,128,1,float16,fp8,0,0.03041119873523712
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,16,2,128,1,fp8,fp8,0,0.030638399720191955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,16,4,128,1,float16,float16,0,0.03422879874706268
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,16,4,128,1,float16,fp8,0,0.03469919860363006
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,16,4,128,1,fp8,fp8,0,0.03511840105056763
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,16,8,128,1,float16,float16,0,0.04374400079250336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,16,8,128,1,float16,fp8,0,0.04193919897079468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,16,8,128,1,fp8,fp8,0,0.04218080043792725
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,16,16,128,1,float16,float16,0,0.0397487998008728
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,16,16,128,1,float16,fp8,0,0.04087679982185364
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,16,16,128,1,fp8,fp8,0,0.04094719886779785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,16,1,128,1,float16,float16,0,0.023089599609375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,16,1,128,1,float16,fp8,0,0.02391680032014847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,16,1,128,1,fp8,fp8,0,0.02364159971475601
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,16,2,128,1,float16,float16,0,0.023337599635124207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,16,2,128,1,float16,fp8,0,0.024212799966335297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,16,2,128,1,fp8,fp8,0,0.024004800617694853
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,16,16,128,1,float16,float16,0,0.027796798944473268
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,16,4,128,1,float16,float16,0,0.024268800020217897
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,16,4,128,1,float16,fp8,0,0.02504960000514984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,16,4,128,1,fp8,fp8,0,0.025336000323295593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,16,8,128,1,float16,float16,0,0.0288783997297287
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,16,8,128,1,float16,fp8,0,0.029083201289176942
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,16,8,128,1,fp8,fp8,0,0.02945440113544464
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,16,16,128,1,float16,fp8,0,0.02890399992465973
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,16,16,128,1,fp8,fp8,0,0.028681600093841554
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,16,1,128,1,float16,float16,0,0.02259040027856827
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,16,1,128,1,float16,fp8,0,0.023112000524997713
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,16,1,128,1,fp8,fp8,0,0.023545600473880768
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,16,2,128,1,float16,float16,0,0.022679999470710754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,16,2,128,1,float16,fp8,0,0.023366400599479677
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,16,2,128,1,fp8,fp8,0,0.02359839975833893
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,16,4,128,1,float16,float16,0,0.022753599286079406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,16,4,128,1,float16,fp8,0,0.023497599363327026
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,16,4,128,1,fp8,fp8,0,0.02359199970960617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,16,8,128,1,float16,float16,0,0.02373439967632294
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,16,8,128,1,float16,fp8,0,0.02460319995880127
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,16,8,128,1,fp8,fp8,0,0.02447039932012558
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,16,16,128,1,float16,float16,0,0.02228800058364868
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,16,16,128,1,float16,fp8,0,0.02312159985303879
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,16,16,128,1,fp8,fp8,0,0.023107199370861052
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,16,1,128,1,float16,float16,0,0.021116800606250763
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,16,1,128,1,float16,fp8,0,0.021793599426746368
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,16,1,128,1,fp8,fp8,0,0.022086399793624877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,16,2,128,1,float16,float16,0,0.021542400121688843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,16,2,128,1,float16,fp8,0,0.022116799652576447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,16,2,128,1,fp8,fp8,0,0.022206400334835053
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,16,4,128,1,float16,float16,0,0.021347199380397797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,16,4,128,1,float16,fp8,0,0.022140799462795256
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,16,4,128,1,fp8,fp8,0,0.022268800437450408
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,16,8,128,1,float16,float16,0,0.02168000042438507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,16,8,128,1,float16,fp8,0,0.022259199619293214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,16,8,128,1,fp8,fp8,0,0.022878399491310118
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,16,16,128,1,float16,float16,0,0.020483200252056123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,16,16,128,1,float16,fp8,0,0.02123199999332428
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,16,16,128,1,fp8,fp8,0,0.021425600349903106
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,16,1,128,1,float16,float16,0,0.02003040015697479
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,16,1,128,1,float16,fp8,0,0.021076799929142
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,16,1,128,1,fp8,fp8,0,0.021110400557518005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,16,2,128,1,float16,float16,0,0.020316800475120543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,16,2,128,1,float16,fp8,0,0.02080000042915344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,16,2,128,1,fp8,fp8,0,0.020908799767494202
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,16,4,128,1,float16,float16,0,0.02025119960308075
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,16,4,128,1,fp8,fp8,0,0.02102559953927994
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,16,4,128,1,float16,fp8,0,0.021014399826526642
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,16,8,128,1,float16,float16,0,0.020577600598335265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,16,8,128,1,float16,fp8,0,0.02146880030632019
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,16,8,128,1,fp8,fp8,0,0.021028800308704375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,16,16,128,1,float16,float16,0,0.01979999989271164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,16,16,128,1,float16,fp8,0,0.020873600244522096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,16,16,128,1,fp8,fp8,0,0.020713600516319274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,16,1,128,1,float16,float16,0,0.019787199795246124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,16,1,128,1,float16,fp8,0,0.02034880071878433
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,16,1,128,1,fp8,fp8,0,0.02030559927225113
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,16,2,128,1,float16,float16,0,0.019734400510787963
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,16,2,128,1,float16,fp8,0,0.020587199926376344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,16,2,128,1,fp8,fp8,0,0.020329600572586058
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,16,4,128,1,float16,float16,0,0.019735999405384064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,16,4,128,1,float16,fp8,0,0.020422400534152986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,16,4,128,1,fp8,fp8,0,0.020633600652217865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,16,8,128,1,float16,float16,0,0.020099200308322906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,16,8,128,1,float16,fp8,0,0.020735999941825865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,16,8,128,1,fp8,fp8,0,0.020870399475097657
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,16,1,128,1,float16,float16,0,0.0759440004825592
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,16,1,128,1,float16,fp8,0,0.07934399843215942
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,16,1,128,1,fp8,fp8,0,0.07961120009422303
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,16,2,128,1,float16,float16,0,0.08520640134811401
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,16,2,128,1,float16,fp8,0,0.08726239800453187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,16,2,128,1,fp8,fp8,0,0.0874239981174469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,16,4,128,1,float16,float16,0,0.10391839742660522
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,16,4,128,1,float16,fp8,0,0.10544799566268921
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,16,4,128,1,fp8,fp8,0,0.10501760244369507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,16,8,128,1,float16,float16,0,0.1698240041732788
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,16,8,128,1,float16,fp8,0,0.17532960176467896
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,16,8,128,1,fp8,fp8,0,0.1755743980407715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,16,16,128,1,float16,float16,0,0.1628175973892212
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,16,16,128,1,float16,fp8,0,0.16976640224456788
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,16,16,128,1,fp8,fp8,0,0.1696560025215149
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,16,1,128,1,float16,float16,0,0.0404448002576828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,16,1,128,1,float16,fp8,0,0.04224480092525482
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,16,1,128,1,fp8,fp8,0,0.04262239933013916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,16,2,128,1,float16,float16,0,0.04888480007648468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,16,2,128,1,float16,fp8,0,0.04720959961414337
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,16,2,128,1,fp8,fp8,0,0.04738239943981171
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,16,4,128,1,float16,float16,0,0.06055840253829956
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,16,4,128,1,float16,fp8,0,0.05620800256729126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,16,4,128,1,fp8,fp8,0,0.05562880039215088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,16,8,128,1,float16,float16,0,0.09520800113677978
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,16,8,128,1,float16,fp8,0,0.09393759965896606
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,16,8,128,1,fp8,fp8,0,0.09242879748344421
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,16,16,128,1,float16,float16,0,0.09010239839553832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,16,16,128,1,float16,fp8,0,0.08500000238418579
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,16,16,128,1,fp8,fp8,0,0.0860144019126892
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,16,1,128,1,float16,float16,0,0.028502398729324342
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,16,1,128,1,float16,fp8,0,0.030131199955940248
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,16,1,128,1,fp8,fp8,0,0.03022879958152771
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,16,2,128,1,float16,float16,0,0.029497599601745604
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,16,2,128,1,float16,fp8,0,0.0311024010181427
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,16,2,128,1,fp8,fp8,0,0.031086400151252747
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,16,4,128,1,float16,float16,0,0.03467839956283569
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,16,4,128,1,float16,fp8,0,0.03517920076847077
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,16,4,128,1,fp8,fp8,0,0.03542560040950775
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,16,8,128,1,float16,float16,0,0.05140479803085327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,16,8,128,1,float16,fp8,0,0.05130239725112915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,16,8,128,1,fp8,fp8,0,0.051399999856948854
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,16,16,128,1,float16,float16,0,0.04767360091209412
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,16,16,128,1,float16,fp8,0,0.050273597240448
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,16,16,128,1,fp8,fp8,0,0.05024480223655701
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,16,1,128,1,float16,float16,0,0.023678399622440338
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,16,1,128,1,float16,fp8,0,0.024281600117683412
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,16,1,128,1,fp8,fp8,0,0.02458080053329468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,16,2,128,1,float16,float16,0,0.023532800376415253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,16,2,128,1,float16,fp8,0,0.02454880028963089
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,16,2,128,1,fp8,fp8,0,0.024539199471473695
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,16,4,128,1,float16,float16,0,0.024873599410057068
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,16,4,128,1,float16,fp8,0,0.025467199087142945
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,16,4,128,1,fp8,fp8,0,0.02553279995918274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,16,8,128,1,float16,float16,0,0.032636800408363344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,16,8,128,1,float16,fp8,0,0.03441280126571655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,16,8,128,1,fp8,fp8,0,0.033799999952316286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,16,16,128,1,float16,float16,0,0.03184640109539032
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,16,16,128,1,float16,fp8,0,0.033283200860023496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,16,16,128,1,fp8,fp8,0,0.033211201429367065
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,16,1,128,1,float16,float16,0,0.023020799458026885
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,16,1,128,1,float16,fp8,0,0.023511999845504762
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,16,1,128,1,fp8,fp8,0,0.023347200453281404
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,16,2,128,1,float16,float16,0,0.022548800706863402
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,16,2,128,1,float16,fp8,0,0.023390400409698486
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,16,2,128,1,fp8,fp8,0,0.023291200399398804
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,16,4,128,1,float16,float16,0,0.02260800004005432
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,16,4,128,1,float16,fp8,0,0.023817600309848787
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,16,4,128,1,fp8,fp8,0,0.02386080026626587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,16,8,128,1,float16,float16,0,0.023476800322532652
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,16,8,128,1,float16,fp8,0,0.024417600035667418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,16,8,128,1,fp8,fp8,0,0.02454880028963089
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,16,16,128,1,float16,float16,0,0.022777600586414336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,16,16,128,1,float16,fp8,0,0.023364800214767455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,16,2,128,1,float16,fp8,0,0.02242400050163269
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,16,16,128,1,fp8,fp8,0,0.023470400273799895
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,16,1,128,1,float16,float16,0,0.021436800062656403
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,16,1,128,1,float16,fp8,0,0.02205280065536499
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,16,1,128,1,fp8,fp8,0,0.0221343994140625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,16,2,128,1,float16,float16,0,0.021264000236988066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,16,2,128,1,fp8,fp8,0,0.022180800139904023
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,16,4,128,1,float16,float16,0,0.021563200652599333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,16,4,128,1,float16,fp8,0,0.022116799652576447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,16,4,128,1,fp8,fp8,0,0.022176000475883483
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,16,8,128,1,float16,float16,0,0.021751999855041504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,16,8,128,1,float16,fp8,0,0.02228800058364868
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,16,8,128,1,fp8,fp8,0,0.022126400470733644
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,16,16,128,1,float16,float16,0,0.0207056000828743
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,16,16,128,1,float16,fp8,0,0.021641600131988525
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,16,16,128,1,fp8,fp8,0,0.021595199406147004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,16,1,128,1,float16,float16,0,0.02001280039548874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,16,1,128,1,float16,fp8,0,0.02096160054206848
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,16,1,128,1,fp8,fp8,0,0.021036800742149354
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,16,2,128,1,float16,float16,0,0.02028159946203232
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,16,2,128,1,float16,fp8,0,0.020640000700950623
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,16,2,128,1,fp8,fp8,0,0.021171200275421142
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,16,4,128,1,float16,float16,0,0.020371200144290925
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,16,4,128,1,float16,fp8,0,0.021086399257183076
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,16,4,128,1,fp8,fp8,0,0.021372799575328828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,16,8,128,1,float16,float16,0,0.020452800393104553
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,16,8,128,1,float16,fp8,0,0.02122880071401596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,16,8,128,1,fp8,fp8,0,0.02159679979085922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,16,16,128,1,float16,float16,0,0.020017600059509276
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,16,16,128,1,float16,fp8,0,0.021086399257183076
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,16,16,128,1,fp8,fp8,0,0.02099040001630783
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,16,1,128,1,float16,float16,0,0.01964000016450882
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,16,1,128,1,float16,fp8,0,0.02052319943904877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,16,1,128,1,fp8,fp8,0,0.02051520049571991
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,16,2,128,1,float16,float16,0,0.019817599654197694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,16,2,128,1,float16,fp8,0,0.020644800364971162
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,16,2,128,1,fp8,fp8,0,0.02032800018787384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,16,4,128,1,float16,float16,0,0.01982560008764267
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,16,4,128,1,float16,fp8,0,0.02059199959039688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,16,4,128,1,fp8,fp8,0,0.020691199600696562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,16,8,128,1,float16,float16,0,0.020132799446582795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,16,8,128,1,float16,fp8,0,0.02086720019578934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,16,8,128,1,fp8,fp8,0,0.020822399854660036
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,16,16,128,1,float16,float16,0,0.019947199523448943
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,16,16,128,1,float16,fp8,0,0.02036159932613373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,16,16,128,1,fp8,fp8,0,0.020755200088024138
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,16,1,128,1,float16,float16,0,0.01963520050048828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,16,1,128,1,float16,fp8,0,0.020457600057125092
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,16,1,128,1,fp8,fp8,0,0.02024800032377243
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,16,2,128,1,float16,float16,0,0.019655999541282655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,16,2,128,1,float16,fp8,0,0.020608000457286835
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,16,2,128,1,fp8,fp8,0,0.020473599433898926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,16,4,128,1,float16,float16,0,0.019593599438667297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,16,4,128,1,float16,fp8,0,0.02030719965696335
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,16,4,128,1,fp8,fp8,0,0.02062080055475235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,16,8,128,1,float16,float16,0,0.01964640021324158
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,16,8,128,1,float16,fp8,0,0.020316800475120543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,16,2,128,1,float16,fp8,0,0.04992479979991913
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,16,8,128,1,fp8,fp8,0,0.020563200116157532
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,16,1,128,1,float16,float16,0,0.04316799938678741
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,16,1,128,1,float16,fp8,0,0.04497919976711273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,16,1,128,1,fp8,fp8,0,0.04400480091571808
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,16,2,128,1,float16,float16,0,0.05128960013389587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,16,2,128,1,fp8,fp8,0,0.05140479803085327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,16,4,128,1,float16,float16,0,0.07724159955978394
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,16,4,128,1,float16,fp8,0,0.07526400089263915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,16,4,128,1,fp8,fp8,0,0.07570719718933105
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,16,8,128,1,float16,float16,0,0.12681119441986083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,16,8,128,1,float16,fp8,0,0.13311680555343627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,16,8,128,1,fp8,fp8,0,0.1312672019004822
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,16,16,128,1,float16,float16,0,0.12180960178375244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,16,16,128,1,float16,fp8,0,0.12352160215377808
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,16,16,128,1,fp8,fp8,0,0.12282400131225586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,16,1,128,1,float16,float16,0,0.029542401432991028
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,16,1,128,1,float16,fp8,0,0.03110080063343048
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,16,1,128,1,fp8,fp8,0,0.03123359978199005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,16,2,128,1,float16,float16,0,0.030640000104904176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,16,2,128,1,float16,fp8,0,0.03239679932594299
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,16,2,128,1,fp8,fp8,0,0.032120001316070554
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,16,4,128,1,float16,float16,0,0.04198879897594452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,16,4,128,1,float16,fp8,0,0.0456063985824585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,16,4,128,1,fp8,fp8,0,0.04521119892597199
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,16,8,128,1,float16,float16,0,0.06612799763679504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,16,8,128,1,float16,fp8,0,0.07012159824371338
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,16,8,128,1,fp8,fp8,0,0.07060480117797852
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,16,16,128,1,float16,float16,0,0.062300801277160645
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,16,16,128,1,float16,fp8,0,0.06903520226478577
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,16,16,128,1,fp8,fp8,0,0.06916639804840088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,16,1,128,1,float16,float16,0,0.024353599548339842
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,16,1,128,1,float16,fp8,0,0.024747200310230255
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,16,1,128,1,fp8,fp8,0,0.02476319968700409
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,16,2,128,1,float16,float16,0,0.024769599735736846
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,16,2,128,1,float16,fp8,0,0.02507520020008087
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,16,2,128,1,fp8,fp8,0,0.025332799553871153
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,16,4,128,1,float16,float16,0,0.028731200098991393
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,16,4,128,1,float16,fp8,0,0.030551999807357788
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,16,4,128,1,fp8,fp8,0,0.03044799864292145
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,16,8,128,1,float16,float16,0,0.04076800048351288
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,16,8,128,1,float16,fp8,0,0.04396480023860931
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,16,8,128,1,fp8,fp8,0,0.043137601017951964
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,16,16,128,1,float16,float16,0,0.03975360095500946
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,16,16,128,1,float16,fp8,0,0.0428272008895874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,16,16,128,1,fp8,fp8,0,0.04270879924297333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,16,1,128,1,float16,float16,0,0.022908799350261688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,16,1,128,1,float16,fp8,0,0.023955200612545014
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,16,1,128,1,fp8,fp8,0,0.023846399784088135
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,16,2,128,1,float16,float16,0,0.023265600204467773
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,16,2,128,1,float16,fp8,0,0.02369280010461807
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,16,2,128,1,fp8,fp8,0,0.023979200422763823
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,16,4,128,1,float16,float16,0,0.02292799949645996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,16,4,128,1,float16,fp8,0,0.023937599360942842
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,16,4,128,1,fp8,fp8,0,0.023844799399375914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,16,8,128,1,float16,float16,0,0.02778719961643219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,16,8,128,1,float16,fp8,0,0.029335999488830568
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,16,8,128,1,fp8,fp8,0,0.029267200827598573
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,16,16,128,1,float16,float16,0,0.026092800498008727
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,16,16,128,1,float16,fp8,0,0.028118398785591126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,16,16,128,1,fp8,fp8,0,0.028147199749946596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,16,1,128,1,float16,float16,0,0.02157440036535263
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,16,1,128,1,float16,fp8,0,0.0221903994679451
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,16,1,128,1,fp8,fp8,0,0.022324800491333008
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,16,2,128,1,float16,float16,0,0.02112639993429184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,16,2,128,1,float16,fp8,0,0.021993599832057953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,16,2,128,1,fp8,fp8,0,0.02244960069656372
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,16,4,128,1,float16,float16,0,0.02152640074491501
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,16,4,128,1,float16,fp8,0,0.022411200404167175
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,16,4,128,1,fp8,fp8,0,0.022023999691009523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,16,8,128,1,float16,float16,0,0.021694399416446686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,16,8,128,1,float16,fp8,0,0.022623999416828154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,16,8,128,1,fp8,fp8,0,0.02228800058364868
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,16,16,128,1,float16,float16,0,0.02091040015220642
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,16,16,128,1,float16,fp8,0,0.021904000639915468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,16,16,128,1,fp8,fp8,0,0.022012799978256226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,16,1,128,1,float16,float16,0,0.02035039961338043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,16,1,128,1,float16,fp8,0,0.021185599267482758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,16,1,128,1,fp8,fp8,0,0.021027199923992157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,16,2,128,1,float16,float16,0,0.020428800582885744
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,16,2,128,1,float16,fp8,0,0.020927999913692475
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,16,2,128,1,fp8,fp8,0,0.020921599864959717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,16,4,128,1,float16,float16,0,0.020579199492931365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,16,4,128,1,float16,fp8,0,0.021236799657344818
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,16,4,128,1,fp8,fp8,0,0.021272000670433045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,16,8,128,1,float16,float16,0,0.020744000375270844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,16,8,128,1,float16,fp8,0,0.02157440036535263
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,16,8,128,1,fp8,fp8,0,0.021673600375652313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,16,16,128,1,float16,float16,0,0.02011999934911728
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,16,16,128,1,float16,fp8,0,0.021358400583267212
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,16,16,128,1,fp8,fp8,0,0.0212336003780365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,16,1,128,1,float16,float16,0,0.019840000569820403
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,16,1,128,1,float16,fp8,0,0.02054080069065094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,16,1,128,1,fp8,fp8,0,0.02030239999294281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,16,2,128,1,float16,float16,0,0.019971199333667755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,16,2,128,1,float16,fp8,0,0.02066880017518997
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,16,2,128,1,fp8,fp8,0,0.0205487996339798
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,16,4,128,1,float16,float16,0,0.019649599492549897
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,16,4,128,1,float16,fp8,0,0.020604799687862396
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,16,4,128,1,fp8,fp8,0,0.02053920030593872
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,16,8,128,1,float16,float16,0,0.02022559940814972
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,16,8,128,1,float16,fp8,0,0.020878399908542632
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,16,8,128,1,fp8,fp8,0,0.02101760059595108
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,16,16,128,1,float16,float16,0,0.020401600003242492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,16,16,128,1,float16,fp8,0,0.020876799523830415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,16,16,128,1,fp8,fp8,0,0.020803199708461763
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,16,1,128,1,float16,float16,0,0.01959040015935898
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,16,1,128,1,float16,fp8,0,0.020428800582885744
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,16,1,128,1,fp8,fp8,0,0.02033119946718216
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,16,2,128,1,float16,float16,0,0.019603200256824493
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,16,2,128,1,float16,fp8,0,0.020712000131607056
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,16,2,128,1,fp8,fp8,0,0.020510399341583253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,16,4,128,1,float16,float16,0,0.019438399374485014
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,16,4,128,1,float16,fp8,0,0.020164799690246583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,16,4,128,1,fp8,fp8,0,0.020390400290489198
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,16,8,128,1,float16,float16,0,0.019788800179958342
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,16,8,128,1,float16,fp8,0,0.020422400534152986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,16,8,128,1,fp8,fp8,0,0.0204927995800972
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,16,16,128,1,float16,float16,0,0.019607999920845033
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,16,16,128,1,float16,fp8,0,0.02059520035982132
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,16,16,128,1,fp8,fp8,0,0.02011519968509674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,16,1,128,1,float16,float16,0,0.019331200420856474
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,16,4,128,1,float16,fp8,0,0.019838400185108185
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,16,1,128,1,float16,fp8,0,0.020374399423599244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,16,1,128,1,fp8,fp8,0,0.020275199413299562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,16,2,128,1,float16,float16,0,0.019441600143909454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,16,2,128,1,float16,fp8,0,0.020340800285339355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,16,2,128,1,fp8,fp8,0,0.020315200090408325
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,16,4,128,1,float16,float16,0,0.01937440037727356
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,16,4,128,1,fp8,fp8,0,0.020326399803161622
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,16,8,128,1,float16,float16,0,0.019300800561904908
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,16,8,128,1,float16,fp8,0,0.020287999510765077
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,16,8,128,1,fp8,fp8,0,0.020310400426387785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,16,1,128,1,float16,float16,0,0.02984800040721893
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,16,1,128,1,float16,fp8,0,0.031711998581886294
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,16,1,128,1,fp8,fp8,0,0.031876799464225766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,16,2,128,1,float16,float16,0,0.03718239963054657
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,16,2,128,1,float16,fp8,0,0.03991360068321228
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,16,2,128,1,fp8,fp8,0,0.040387201309204104
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,16,4,128,1,float16,float16,0,0.0522271990776062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,16,4,128,1,float16,fp8,0,0.058169597387313844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,16,4,128,1,fp8,fp8,0,0.05830240249633789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,16,8,128,1,float16,float16,0,0.08090400099754333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,16,8,128,1,float16,fp8,0,0.09409919977188111
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,16,8,128,1,fp8,fp8,0,0.09312160015106201
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,16,16,128,1,float16,float16,0,0.07843199968338013
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,16,16,128,1,float16,fp8,0,0.09200000166893005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,16,16,128,1,fp8,fp8,0,0.09182720184326172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,16,1,128,1,float16,float16,0,0.02354400008916855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,16,1,128,1,float16,fp8,0,0.023848000168800353
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,16,1,128,1,fp8,fp8,0,0.02396000027656555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,16,2,128,1,float16,float16,0,0.026209598779678343
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,16,2,128,1,float16,fp8,0,0.028439998626708984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,16,2,128,1,fp8,fp8,0,0.028539198637008666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,16,4,128,1,float16,float16,0,0.034379199147224426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,16,4,128,1,float16,fp8,0,0.03747200071811676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,16,4,128,1,fp8,fp8,0,0.03775359988212586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,16,8,128,1,float16,float16,0,0.04896160066127777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,16,8,128,1,float16,fp8,0,0.05453280210494995
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,16,8,128,1,fp8,fp8,0,0.055211198329925534
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,16,16,128,1,float16,float16,0,0.047809600830078125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,16,16,128,1,float16,fp8,0,0.05420799851417542
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,16,16,128,1,fp8,fp8,0,0.05437920093536377
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,16,1,128,1,float16,float16,0,0.020732800662517547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,16,1,128,1,float16,fp8,0,0.02189760059118271
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,16,1,128,1,fp8,fp8,0,0.021646399796009064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,16,2,128,1,float16,float16,0,0.021172800660133363
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,16,2,128,1,float16,fp8,0,0.022230400145053862
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,16,2,128,1,fp8,fp8,0,0.022222399711608887
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,16,4,128,1,float16,float16,0,0.02523840069770813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,16,4,128,1,float16,fp8,0,0.026924800872802735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,16,4,128,1,fp8,fp8,0,0.026848000288009644
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,16,8,128,1,float16,float16,0,0.032630398869514465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,16,8,128,1,float16,fp8,0,0.035897600650787356
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,16,8,128,1,fp8,fp8,0,0.035678398609161374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,16,16,128,1,float16,float16,0,0.03247840106487274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,16,16,128,1,float16,fp8,0,0.03546720147132874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,16,16,128,1,fp8,fp8,0,0.03550719916820526
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,16,1,128,1,float16,float16,0,0.02008959949016571
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,16,1,128,1,float16,fp8,0,0.02078240066766739
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,16,1,128,1,fp8,fp8,0,0.020785599946975708
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,16,2,128,1,float16,float16,0,0.020227199792861937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,16,2,128,1,float16,fp8,0,0.0209647998213768
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,16,2,128,1,fp8,fp8,0,0.021201600134372712
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,16,4,128,1,float16,float16,0,0.020585599541664123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,16,4,128,1,float16,fp8,0,0.02171040028333664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,16,4,128,1,fp8,fp8,0,0.021587200462818146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,16,8,128,1,float16,float16,0,0.02459519952535629
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,16,8,128,1,float16,fp8,0,0.02614719867706299
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,16,8,128,1,fp8,fp8,0,0.026081600785255434
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,16,16,128,1,float16,float16,0,0.023852799832820893
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,16,16,128,1,float16,fp8,0,0.025809600949287415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,16,16,128,1,fp8,fp8,0,0.025753599405288697
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,16,1,128,1,float16,float16,0,0.019393600523471832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,16,1,128,1,float16,fp8,0,0.020136000216007234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,16,1,128,1,fp8,fp8,0,0.020491200685501098
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,16,2,128,1,float16,float16,0,0.01972319930791855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,16,2,128,1,float16,fp8,0,0.020532800257205962
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,16,2,128,1,fp8,fp8,0,0.020318399369716644
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,16,4,128,1,float16,float16,0,0.019838400185108185
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,16,4,128,1,float16,fp8,0,0.02078399956226349
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,16,4,128,1,fp8,fp8,0,0.02111999988555908
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,16,8,128,1,float16,float16,0,0.020265600085258482
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,16,8,128,1,float16,fp8,0,0.021160000562667848
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,16,8,128,1,fp8,fp8,0,0.0212336003780365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,16,16,128,1,float16,float16,0,0.020207999646663664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,16,16,128,1,float16,fp8,0,0.020734399557113647
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,16,16,128,1,fp8,fp8,0,0.02123199999332428
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,16,1,128,1,float16,float16,0,0.019307200610637665
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,16,1,128,1,float16,fp8,0,0.020051200687885285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,16,1,128,1,fp8,fp8,0,0.020057600736618043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,16,2,128,1,float16,float16,0,0.019200000166893005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,16,2,128,1,float16,fp8,0,0.020291200280189513
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,16,2,128,1,fp8,fp8,0,0.020337599515914916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,16,4,128,1,float16,float16,0,0.019420799612998963
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,16,4,128,1,float16,fp8,0,0.020521600544452668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,16,4,128,1,fp8,fp8,0,0.020436799526214598
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,16,8,128,1,float16,float16,0,0.019838400185108185
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,16,8,128,1,float16,fp8,0,0.020734399557113647
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,16,8,128,1,fp8,fp8,0,0.020715199410915375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,16,16,128,1,float16,float16,0,0.01966560035943985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,16,16,128,1,float16,fp8,0,0.02080159932374954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,16,16,128,1,fp8,fp8,0,0.02035039961338043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,16,1,128,1,float16,float16,0,0.018799999356269838
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,16,1,128,1,float16,fp8,0,0.01998240053653717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,16,1,128,1,fp8,fp8,0,0.019897599518299103
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,16,2,128,1,float16,float16,0,0.019152000546455383
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,16,2,128,1,float16,fp8,0,0.019988800585269927
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,16,2,128,1,fp8,fp8,0,0.019809600710868836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,16,4,128,1,float16,float16,0,0.01934719979763031
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,16,4,128,1,float16,fp8,0,0.020017600059509276
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,16,4,128,1,fp8,fp8,0,0.02014400064945221
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,16,8,128,1,float16,float16,0,0.019512000679969787
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,16,8,128,1,float16,fp8,0,0.020398400723934174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,16,8,128,1,fp8,fp8,0,0.020263999700546265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,16,16,128,1,float16,float16,0,0.01934400051832199
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,16,16,128,1,float16,fp8,0,0.02024960070848465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,16,16,128,1,fp8,fp8,0,0.020209600031375886
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,16,1,128,1,float16,float16,0,0.017238399386405943
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,16,1,128,1,float16,fp8,0,0.018131199479103088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,16,1,128,1,fp8,fp8,0,0.018004800379276275
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,16,2,128,1,float16,float16,0,0.01881919950246811
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,16,2,128,1,float16,fp8,0,0.019593599438667297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,16,2,128,1,fp8,fp8,0,0.019627200067043306
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,16,4,128,1,float16,float16,0,0.018980799615383147
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,16,4,128,1,float16,fp8,0,0.01974560022354126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,16,4,128,1,fp8,fp8,0,0.019833600521087645
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,16,8,128,1,float16,float16,0,0.019208000600337984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,16,8,128,1,float16,fp8,0,0.02014079988002777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,16,8,128,1,fp8,fp8,0,0.02001439929008484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,16,16,128,1,float16,float16,0,0.019228799641132353
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,16,16,128,1,float16,fp8,0,0.019883200526237488
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,16,16,128,1,fp8,fp8,0,0.020131200551986694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,16,1,128,1,float16,float16,0,0.01634240001440048
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,16,1,128,1,float16,fp8,0,0.017339199781417847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,16,1,128,1,fp8,fp8,0,0.016998399794101716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,16,2,128,1,float16,float16,0,0.01727519929409027
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,16,2,128,1,float16,fp8,0,0.017836800217628478
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,16,2,128,1,fp8,fp8,0,0.018033599853515624
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,16,4,128,1,float16,float16,0,0.01884160041809082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,16,4,128,1,float16,fp8,0,0.019687999784946442
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,16,4,128,1,fp8,fp8,0,0.019636799395084382
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,16,8,128,1,float16,float16,0,0.01897599995136261
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,16,8,128,1,float16,fp8,0,0.019550399482250215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,16,8,128,1,fp8,fp8,0,0.019470399618148802
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,12,1,128,1,float16,fp8,0,9.620763397216797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,12,1,128,1,fp8,fp8,0,9.692060852050782
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,12,2,128,1,float16,fp8,0,9.808668518066407
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,12,2,128,1,fp8,fp8,0,9.62247543334961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,12,1,128,1,float16,float16,0,11.650943756103516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,12,4,128,1,float16,fp8,0,10.074870300292968
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,12,2,128,1,float16,float16,0,11.571006774902344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,12,4,128,1,float16,float16,0,11.835107421875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,12,12,128,1,float16,fp8,0,5.591043090820312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,12,1,128,1,float16,fp8,0,4.814612960815429
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,12,12,128,1,fp8,fp8,0,5.559764862060547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,12,12,128,1,float16,float16,0,6.467393493652343
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,12,1,128,1,float16,float16,0,5.70599365234375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,12,1,128,1,fp8,fp8,0,4.897716903686524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,12,2,128,1,float16,float16,0,5.7231792449951175
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,12,4,128,1,fp8,fp8,0,10.096382141113281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,12,12,128,1,float16,float16,0,3.1531919479370116
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,12,2,128,1,float16,fp8,0,4.898831939697265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,12,2,128,1,fp8,fp8,0,4.893110275268555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,12,12,128,1,float16,fp8,0,2.891152000427246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,12,4,128,1,float16,fp8,0,5.0585582733154295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,12,4,128,1,fp8,fp8,0,5.120232009887696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,12,4,128,1,float16,float16,0,5.863905715942383
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,12,12,128,1,fp8,fp8,0,2.816948890686035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,12,1,128,1,float16,float16,0,2.7468095779418946
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,12,1,128,1,float16,fp8,0,2.4135055541992188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,12,1,128,1,fp8,fp8,0,2.404313659667969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,12,2,128,1,float16,float16,0,2.6493167877197266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,12,2,128,1,float16,fp8,0,2.459734344482422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,12,2,128,1,fp8,fp8,0,2.4667375564575194
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,12,4,128,1,float16,float16,0,2.8639535903930664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,12,12,128,1,float16,float16,0,1.514590358734131
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,12,4,128,1,float16,fp8,0,2.519206428527832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,12,12,128,1,float16,fp8,0,1.426195240020752
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,12,12,128,1,fp8,fp8,0,1.528711986541748
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,12,4,128,1,fp8,fp8,0,2.6124847412109373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,12,1,128,1,float16,float16,0,1.3047264099121094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,12,1,128,1,float16,fp8,0,1.2129599571228027
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,12,1,128,1,fp8,fp8,0,1.294825553894043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,12,2,128,1,float16,float16,0,1.3510704040527344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,12,2,128,1,float16,fp8,0,1.2383343696594238
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,12,2,128,1,fp8,fp8,0,1.2388815879821777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,12,4,128,1,float16,float16,0,1.3727375984191894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,12,4,128,1,float16,fp8,0,1.2956432342529296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,12,4,128,1,fp8,fp8,0,1.4227408409118651
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,12,1,128,1,float16,fp8,0,5.477791976928711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,12,1,128,1,fp8,fp8,0,5.563967895507813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,12,1,128,1,float16,float16,0,6.54175033569336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,12,2,128,1,float16,fp8,0,5.614217758178711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,12,2,128,1,fp8,fp8,0,5.620756912231445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,12,2,128,1,float16,float16,0,6.671209716796875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,12,4,128,1,float16,fp8,0,5.839524841308593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,12,4,128,1,float16,float16,0,6.832838439941407
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,12,12,128,1,float16,float16,0,3.8322559356689454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,12,1,128,1,float16,fp8,0,2.879915237426758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,12,1,128,1,float16,float16,0,3.186737632751465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,12,12,128,1,fp8,fp8,0,3.355307388305664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,12,12,128,1,float16,fp8,0,3.54183349609375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,12,1,128,1,fp8,fp8,0,2.755143928527832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,12,2,128,1,float16,float16,0,3.2693153381347657
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,12,4,128,1,fp8,fp8,0,5.79785270690918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,12,2,128,1,float16,fp8,0,2.9372976303100584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,12,2,128,1,fp8,fp8,0,2.809156799316406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,12,12,128,1,float16,float16,0,1.8694944381713867
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,12,12,128,1,float16,fp8,0,1.8070528030395507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,12,12,128,1,fp8,fp8,0,1.7083599090576171
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,12,4,128,1,float16,fp8,0,2.910246467590332
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,12,4,128,1,float16,float16,0,3.3131519317626954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,12,4,128,1,fp8,fp8,0,2.914291191101074
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,12,1,128,1,float16,float16,0,1.4873727798461913
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,12,1,128,1,float16,fp8,0,1.411695957183838
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,12,1,128,1,fp8,fp8,0,1.4078736305236816
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,12,2,128,1,float16,float16,0,1.4881471633911132
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,12,2,128,1,float16,fp8,0,1.4170576095581056
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,12,2,128,1,fp8,fp8,0,1.4662240028381348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,12,4,128,1,float16,float16,0,1.5715968132019043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,12,12,128,1,float16,float16,0,0.9606639862060546
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,12,4,128,1,float16,fp8,0,1.5580143928527832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,12,12,128,1,float16,fp8,0,0.908516788482666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,12,4,128,1,fp8,fp8,0,1.4718288421630858
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,12,12,128,1,fp8,fp8,0,0.8748784065246582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,12,1,128,1,float16,float16,0,0.7505104064941406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,12,1,128,1,float16,fp8,0,0.7365503787994385
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,12,1,128,1,fp8,fp8,0,0.7080399990081787
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,12,2,128,1,float16,float16,0,0.7702688217163086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,12,2,128,1,float16,fp8,0,0.7303904056549072
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,12,2,128,1,fp8,fp8,0,0.7252319812774658
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,12,4,128,1,float16,float16,0,0.7925055980682373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,12,4,128,1,float16,fp8,0,0.747379207611084
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,12,4,128,1,fp8,fp8,0,0.7579840183258056
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,12,1,128,1,float16,fp8,0,3.848129653930664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,12,1,128,1,float16,float16,0,4.453779220581055
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,12,1,128,1,fp8,fp8,0,3.848012924194336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,12,2,128,1,float16,fp8,0,3.9731822967529298
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,12,2,128,1,fp8,fp8,0,3.9610496520996095
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,12,2,128,1,float16,float16,0,4.64213752746582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,12,4,128,1,float16,float16,0,4.741691207885742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,12,4,128,1,float16,fp8,0,4.13310546875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,12,12,128,1,float16,fp8,0,2.464743995666504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,12,12,128,1,float16,float16,0,2.746883201599121
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,12,1,128,1,float16,float16,0,2.120123291015625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,12,12,128,1,fp8,fp8,0,2.4578943252563477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,12,1,128,1,float16,fp8,0,2.0097312927246094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,12,4,128,1,fp8,fp8,0,4.13507194519043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,12,1,128,1,fp8,fp8,0,2.040135955810547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,12,2,128,1,float16,float16,0,2.1855136871337892
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,12,2,128,1,float16,fp8,0,1.9919567108154297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,12,2,128,1,fp8,fp8,0,2.03176326751709
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,12,12,128,1,float16,float16,0,1.355940818786621
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,12,4,128,1,float16,fp8,0,2.0750656127929688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,12,12,128,1,float16,fp8,0,1.2724927902221679
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,12,4,128,1,float16,float16,0,2.359324836730957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,12,4,128,1,fp8,fp8,0,2.0819440841674806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,12,1,128,1,float16,float16,0,1.027086353302002
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,12,12,128,1,fp8,fp8,0,1.3681808471679688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,12,1,128,1,float16,fp8,0,0.9879615783691407
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,12,1,128,1,fp8,fp8,0,1.023798370361328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,12,2,128,1,float16,float16,0,1.0655823707580567
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,12,2,128,1,float16,fp8,0,1.0067104339599608
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,12,2,128,1,fp8,fp8,0,1.0515215873718262
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,12,4,128,1,float16,float16,0,1.1318943977355957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,12,4,128,1,float16,fp8,0,1.0524000167846679
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,12,12,128,1,float16,float16,0,0.711198377609253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,12,4,128,1,fp8,fp8,0,1.0851584434509278
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,12,12,128,1,float16,fp8,0,0.6508575916290283
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,12,12,128,1,fp8,fp8,0,0.6803840160369873
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,12,1,128,1,float16,float16,0,0.53373122215271
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,12,1,128,1,float16,fp8,0,0.5117631912231445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,12,1,128,1,fp8,fp8,0,0.5220304012298584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,12,2,128,1,float16,float16,0,0.5499343872070312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,12,2,128,1,float16,fp8,0,0.5205440044403076
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,12,2,128,1,fp8,fp8,0,0.519704008102417
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,12,4,128,1,float16,float16,0,0.5736911773681641
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,12,4,128,1,float16,fp8,0,0.546563196182251
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,12,4,128,1,fp8,fp8,0,0.5407423973083496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,12,1,128,1,float16,fp8,0,5.049617767333984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,12,1,128,1,fp8,fp8,0,5.029441452026367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,12,1,128,1,float16,float16,0,5.87928466796875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,12,2,128,1,float16,fp8,0,5.191750335693359
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,12,2,128,1,fp8,fp8,0,5.196940612792969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,12,2,128,1,float16,float16,0,5.912953567504883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,12,4,128,1,float16,fp8,0,5.47997932434082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,12,4,128,1,float16,float16,0,6.305299377441406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,12,12,128,1,float16,float16,0,3.6193695068359375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,12,12,128,1,float16,fp8,0,3.4081329345703124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,12,1,128,1,float16,fp8,0,2.7446767807006838
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,12,1,128,1,float16,float16,0,2.8463775634765627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,12,12,128,1,fp8,fp8,0,3.3313488006591796
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,12,1,128,1,fp8,fp8,0,2.5363807678222656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,12,2,128,1,float16,float16,0,2.97490234375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,12,4,128,1,fp8,fp8,0,5.500089645385742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,12,2,128,1,float16,fp8,0,2.650275230407715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,12,2,128,1,fp8,fp8,0,2.612435150146484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,12,12,128,1,float16,float16,0,1.7991024017333985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,12,12,128,1,float16,fp8,0,1.7349712371826171
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,12,12,128,1,fp8,fp8,0,1.6881792068481445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,12,4,128,1,float16,float16,0,2.9371728897094727
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,12,4,128,1,float16,fp8,0,2.830936050415039
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,12,4,128,1,fp8,fp8,0,2.750254440307617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,12,1,128,1,float16,float16,0,1.355673599243164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,12,1,128,1,float16,fp8,0,1.2830927848815918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,12,1,128,1,fp8,fp8,0,1.3130687713623046
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,12,2,128,1,float16,float16,0,1.4025903701782227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,12,2,128,1,float16,fp8,0,1.3259599685668946
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,12,2,128,1,fp8,fp8,0,1.4190431594848634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,12,4,128,1,float16,float16,0,1.4753120422363282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,12,12,128,1,float16,float16,0,0.8979087829589844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,12,4,128,1,float16,fp8,0,1.3860032081604003
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,12,4,128,1,fp8,fp8,0,1.5065024375915528
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,12,12,128,1,float16,fp8,0,0.8795632362365723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,12,12,128,1,fp8,fp8,0,0.8568880081176757
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,12,1,128,1,float16,float16,0,0.6871456146240235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,12,1,128,1,float16,fp8,0,0.69651198387146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,12,1,128,1,fp8,fp8,0,0.6567952156066894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,12,2,128,1,float16,float16,0,0.7122591972351074
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,12,2,128,1,float16,fp8,0,0.6856512069702149
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,12,2,128,1,fp8,fp8,0,0.6755616188049316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,12,4,128,1,float16,float16,0,0.7507520198822022
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,12,4,128,1,float16,fp8,0,0.7092927932739258
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,12,12,128,1,float16,float16,0,0.4613952159881592
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,12,4,128,1,fp8,fp8,0,0.7129151821136475
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,12,12,128,1,float16,fp8,0,0.44256319999694826
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,12,12,128,1,fp8,fp8,0,0.4406320095062256
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,12,1,128,1,float16,float16,0,0.3551232099533081
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,12,1,128,1,float16,fp8,0,0.33738720417022705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,12,1,128,1,fp8,fp8,0,0.33892800807952883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,12,2,128,1,float16,float16,0,0.3674799919128418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,12,2,128,1,float16,fp8,0,0.3452224016189575
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,12,2,128,1,fp8,fp8,0,0.3457904100418091
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,12,4,128,1,float16,float16,0,0.3794624090194702
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,12,4,128,1,float16,fp8,0,0.36545600891113283
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,12,4,128,1,fp8,fp8,0,0.36871678829193116
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,12,1,128,1,float16,float16,0,3.230259323120117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,12,1,128,1,float16,fp8,0,2.959987258911133
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,12,1,128,1,fp8,fp8,0,2.960927963256836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,12,2,128,1,float16,fp8,0,3.0548303604125975
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,12,2,128,1,float16,float16,0,3.46212158203125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,12,2,128,1,fp8,fp8,0,3.0606895446777345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,12,4,128,1,float16,float16,0,3.7134464263916014
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,12,4,128,1,float16,fp8,0,3.2696399688720703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,12,12,128,1,float16,float16,0,2.204011154174805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,12,12,128,1,float16,fp8,0,2.088356781005859
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,12,1,128,1,float16,float16,0,1.6712495803833007
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,12,1,128,1,float16,fp8,0,1.4887184143066405
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,12,12,128,1,fp8,fp8,0,2.0899648666381836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,12,4,128,1,fp8,fp8,0,3.3625137329101564
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,12,1,128,1,fp8,fp8,0,1.5563088417053224
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,12,2,128,1,float16,float16,0,1.633683204650879
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,12,2,128,1,float16,fp8,0,1.5624239921569825
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,12,2,128,1,fp8,fp8,0,1.5422927856445312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,12,12,128,1,float16,float16,0,1.1331775665283204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,12,4,128,1,float16,float16,0,1.7802623748779296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,12,4,128,1,float16,fp8,0,1.6493663787841797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,12,12,128,1,float16,fp8,0,1.0758560180664063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,12,4,128,1,fp8,fp8,0,1.644139289855957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,12,12,128,1,fp8,fp8,0,1.15350399017334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,12,1,128,1,float16,float16,0,0.7955776214599609
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,12,1,128,1,float16,fp8,0,0.7550015926361084
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,12,1,128,1,fp8,fp8,0,0.756712007522583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,12,2,128,1,fp8,fp8,0,0.7894207954406738
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,12,2,128,1,float16,float16,0,0.8538064002990723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,12,2,128,1,float16,fp8,0,0.7851312160491943
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,12,4,128,1,float16,float16,0,0.8709136009216308
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,12,4,128,1,float16,fp8,0,0.8504752159118653
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,12,12,128,1,float16,float16,0,0.5869487762451172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,12,4,128,1,fp8,fp8,0,0.8546159744262696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,12,12,128,1,float16,fp8,0,0.5456079959869384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,12,12,128,1,fp8,fp8,0,0.5482031822204589
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,12,1,128,1,float16,float16,0,0.41777758598327636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,12,1,128,1,float16,fp8,0,0.39324159622192384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,12,1,128,1,fp8,fp8,0,0.3898080110549927
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,12,2,128,1,float16,float16,0,0.422379207611084
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,12,2,128,1,float16,fp8,0,0.40639362335205076
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,12,2,128,1,fp8,fp8,0,0.40491681098937987
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,12,4,128,1,float16,float16,0,0.452396821975708
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,12,4,128,1,float16,fp8,0,0.4338655948638916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,12,4,128,1,fp8,fp8,0,0.43289761543273925
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,12,12,128,1,float16,float16,0,0.31601600646972655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,12,12,128,1,float16,fp8,0,0.28273439407348633
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,12,12,128,1,fp8,fp8,0,0.28438079357147217
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,12,1,128,1,float16,float16,0,0.2168560028076172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,12,1,128,1,float16,fp8,0,0.2042288064956665
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,12,1,128,1,fp8,fp8,0,0.20315520763397216
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,12,2,128,1,float16,float16,0,0.2269808053970337
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,12,2,128,1,float16,fp8,0,0.21151359081268312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,12,2,128,1,fp8,fp8,0,0.2087536096572876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,12,4,128,1,float16,float16,0,0.23801119327545167
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,12,4,128,1,float16,fp8,0,0.22590880393981932
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,12,4,128,1,fp8,fp8,0,0.22692639827728273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,12,1,128,1,float16,float16,0,2.990412712097168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,12,1,128,1,float16,fp8,0,2.8196592330932617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,12,1,128,1,fp8,fp8,0,2.793369674682617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,12,2,128,1,float16,float16,0,3.2515087127685547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,12,2,128,1,float16,fp8,0,2.9283775329589843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,12,2,128,1,fp8,fp8,0,2.932940864562988
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,12,4,128,1,float16,float16,0,3.401116943359375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,12,4,128,1,float16,fp8,0,3.2236480712890625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,12,12,128,1,float16,float16,0,2.3178640365600587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,12,1,128,1,float16,float16,0,1.4755807876586915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,12,1,128,1,float16,fp8,0,1.4032223701477051
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,12,12,128,1,float16,fp8,0,2.199123191833496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,12,12,128,1,fp8,fp8,0,2.3279136657714843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,12,1,128,1,fp8,fp8,0,1.4799375534057617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,12,4,128,1,fp8,fp8,0,3.26683349609375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,12,2,128,1,float16,float16,0,1.5529295921325683
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,12,2,128,1,float16,fp8,0,1.5100735664367675
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,12,2,128,1,fp8,fp8,0,1.5271727561950683
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,12,4,128,1,float16,float16,0,1.6894800186157226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,12,4,128,1,float16,fp8,0,1.6170944213867187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,12,12,128,1,float16,float16,0,1.1628687858581543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,12,4,128,1,fp8,fp8,0,1.6354383468627929
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,12,12,128,1,fp8,fp8,0,1.1245984077453612
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,12,12,128,1,float16,fp8,0,1.1648176193237305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,12,1,128,1,float16,float16,0,0.7502575874328613
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,12,1,128,1,float16,fp8,0,0.7247680187225342
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,12,1,128,1,fp8,fp8,0,0.7150735855102539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,12,2,128,1,float16,float16,0,0.7898079872131347
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,12,2,128,1,float16,fp8,0,0.7591248035430909
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,12,2,128,1,fp8,fp8,0,0.7596720218658447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,12,4,128,1,float16,float16,0,0.8681695938110352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,12,4,128,1,float16,fp8,0,0.8283967971801758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,12,4,128,1,fp8,fp8,0,0.8296992301940918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,12,12,128,1,float16,float16,0,0.5951807975769043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,12,12,128,1,float16,fp8,0,0.5738128185272217
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,12,12,128,1,fp8,fp8,0,0.5743264198303223
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,12,1,128,1,float16,float16,0,0.3862704038619995
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,12,1,128,1,float16,fp8,0,0.3770495891571045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,12,2,128,1,fp8,fp8,0,0.3927583932876587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,12,1,128,1,fp8,fp8,0,0.3735856056213379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,12,2,128,1,float16,float16,0,0.40290560722351076
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,12,2,128,1,float16,fp8,0,0.3922575950622559
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,12,4,128,1,float16,float16,0,0.44335041046142576
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,12,1,128,1,float16,float16,0,0.2053600072860718
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,12,4,128,1,float16,fp8,0,0.42871999740600586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,12,4,128,1,fp8,fp8,0,0.43061442375183107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,12,12,128,1,float16,float16,0,0.310153603553772
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,12,12,128,1,float16,fp8,0,0.2996975898742676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,12,12,128,1,fp8,fp8,0,0.2988368034362793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,12,1,128,1,float16,fp8,0,0.1966591954231262
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,12,1,128,1,fp8,fp8,0,0.19793920516967772
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,12,2,128,1,float16,float16,0,0.21387999057769774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,12,2,128,1,float16,fp8,0,0.2073807954788208
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,12,2,128,1,fp8,fp8,0,0.20775680541992186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,12,4,128,1,float16,float16,0,0.23413119316101075
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,12,4,128,1,float16,fp8,0,0.2268224000930786
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,12,4,128,1,fp8,fp8,0,0.2268752098083496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,12,12,128,1,float16,float16,0,0.1852720022201538
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,12,12,128,1,float16,fp8,0,0.17538080215454102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,12,12,128,1,fp8,fp8,0,0.1745695948600769
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,12,1,128,1,float16,float16,0,0.11430079936981201
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,12,1,128,1,float16,fp8,0,0.10853760242462158
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,12,1,128,1,fp8,fp8,0,0.10730400085449218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,12,2,128,1,float16,float16,0,0.11953120231628418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,12,2,128,1,float16,fp8,0,0.11354240179061889
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,12,2,128,1,fp8,fp8,0,0.1120303988456726
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,12,4,128,1,float16,float16,0,0.12822240591049194
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,12,4,128,1,float16,fp8,0,0.12106239795684814
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,12,4,128,1,fp8,fp8,0,0.12084800004959106
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,12,1,128,1,float16,float16,0,1.7600847244262696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,12,1,128,1,float16,fp8,0,1.6958175659179688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,12,1,128,1,fp8,fp8,0,1.6753183364868165
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,12,2,128,1,float16,float16,0,1.8723312377929688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,12,2,128,1,float16,fp8,0,1.7889007568359374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,12,2,128,1,fp8,fp8,0,1.7845344543457031
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,12,4,128,1,float16,float16,0,2.0915184020996094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,12,4,128,1,float16,fp8,0,2.0203792572021486
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,12,12,128,1,float16,float16,0,1.5129008293151855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,12,12,128,1,float16,fp8,0,1.4661552429199218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,12,1,128,1,float16,float16,0,0.8875455856323242
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,12,4,128,1,fp8,fp8,0,2.038817596435547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,12,1,128,1,float16,fp8,0,0.8482447624206543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,12,12,128,1,fp8,fp8,0,1.4763936042785644
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,12,1,128,1,fp8,fp8,0,0.9134063720703125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,12,2,128,1,float16,float16,0,0.9403599739074707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,12,2,128,1,float16,fp8,0,0.913326358795166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,12,2,128,1,fp8,fp8,0,0.9061807632446289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,12,4,128,1,float16,float16,0,1.062161636352539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,12,4,128,1,float16,fp8,0,1.0296431541442872
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,12,4,128,1,fp8,fp8,0,1.0168368339538574
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,12,12,128,1,float16,float16,0,0.7771376132965088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,12,12,128,1,float16,fp8,0,0.7711472034454345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,12,1,128,1,float16,float16,0,0.45449280738830566
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,12,12,128,1,fp8,fp8,0,0.7542848110198974
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,12,1,128,1,float16,fp8,0,0.4380015850067139
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,12,1,128,1,fp8,fp8,0,0.4388688087463379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,12,2,128,1,float16,float16,0,0.4884943962097168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,12,2,128,1,float16,fp8,0,0.46639838218688967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,12,2,128,1,fp8,fp8,0,0.4690703868865967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,12,4,128,1,float16,float16,0,0.538374376296997
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,12,4,128,1,float16,fp8,0,0.5210671901702881
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,12,4,128,1,fp8,fp8,0,0.5238848209381104
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,12,12,128,1,float16,float16,0,0.4029871940612793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,12,12,128,1,float16,fp8,0,0.3966032028198242
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,12,12,128,1,fp8,fp8,0,0.3987855911254883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,12,1,128,1,float16,float16,0,0.23788321018218994
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,12,1,128,1,float16,fp8,0,0.2331984043121338
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,12,1,128,1,fp8,fp8,0,0.23399360179901124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,12,2,128,1,float16,float16,0,0.2549792051315308
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,12,2,128,1,float16,fp8,0,0.24722559452056886
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,12,2,128,1,fp8,fp8,0,0.24633278846740722
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,12,4,128,1,float16,float16,0,0.28304319381713866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,12,4,128,1,float16,fp8,0,0.27614240646362304
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,12,4,128,1,fp8,fp8,0,0.27250080108642577
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,12,12,128,1,float16,float16,0,0.21850080490112306
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,12,12,128,1,float16,fp8,0,0.21754720211029052
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,12,12,128,1,fp8,fp8,0,0.21709918975830078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,12,1,128,1,float16,float16,0,0.12972160577774047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,12,1,128,1,float16,fp8,0,0.12132480144500732
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,12,1,128,1,fp8,fp8,0,0.12135200500488282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,12,2,128,1,float16,float16,0,0.13742239475250245
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,12,2,128,1,float16,fp8,0,0.131112003326416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,12,2,128,1,fp8,fp8,0,0.13193279504776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,12,4,128,1,float16,float16,0,0.15375679731369019
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,12,4,128,1,float16,fp8,0,0.14679839611053466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,12,4,128,1,fp8,fp8,0,0.1453744053840637
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,12,12,128,1,float16,float16,0,0.1300976037979126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,12,12,128,1,float16,fp8,0,0.11839200258255005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,12,12,128,1,fp8,fp8,0,0.11976799964904786
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,12,1,128,1,float16,float16,0,0.07464159727096557
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,12,1,128,1,float16,fp8,0,0.07165600061416626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,12,1,128,1,fp8,fp8,0,0.07204639911651611
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,12,2,128,1,float16,float16,0,0.07698400020599365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,12,2,128,1,float16,fp8,0,0.07410719990730286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,12,2,128,1,fp8,fp8,0,0.07422239780426025
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,12,4,128,1,float16,float16,0,0.08458399772644043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,12,4,128,1,float16,fp8,0,0.08308799862861634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,12,4,128,1,fp8,fp8,0,0.08190720081329346
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,12,1,128,1,float16,float16,0,1.7410512924194337
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,12,1,128,1,float16,fp8,0,1.6823295593261718
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,12,1,128,1,fp8,fp8,0,1.670252799987793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,12,2,128,1,float16,float16,0,1.8771184921264648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,12,2,128,1,float16,fp8,0,1.8328592300415039
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,12,2,128,1,fp8,fp8,0,1.8242864608764648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,12,4,128,1,float16,float16,0,2.184584045410156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,12,1,128,1,float16,float16,0,0.8827615737915039
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,12,1,128,1,float16,fp8,0,0.848198413848877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,12,12,128,1,float16,float16,0,1.7179536819458008
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,12,12,128,1,fp8,fp8,0,1.6957279205322267
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,12,4,128,1,float16,fp8,0,2.206974411010742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,12,4,128,1,fp8,fp8,0,2.129244804382324
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,12,12,128,1,float16,fp8,0,1.6676319122314454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,12,1,128,1,fp8,fp8,0,0.8539504051208496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,12,2,128,1,float16,float16,0,0.974942398071289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,12,2,128,1,float16,fp8,0,0.9246000289916992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,12,2,128,1,fp8,fp8,0,0.9270336151123046
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,12,4,128,1,float16,float16,0,1.104206371307373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,12,4,128,1,float16,fp8,0,1.0841952323913575
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,12,4,128,1,fp8,fp8,0,1.0723360061645508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,12,12,128,1,float16,float16,0,0.8714480400085449
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,12,1,128,1,fp8,fp8,0,0.43979840278625487
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,12,12,128,1,float16,fp8,0,0.8525247573852539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,12,1,128,1,float16,float16,0,0.45092320442199707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,12,1,128,1,float16,fp8,0,0.4384160041809082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,12,12,128,1,fp8,fp8,0,0.8632752418518066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,12,2,128,1,float16,float16,0,0.4911632061004639
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,12,2,128,1,float16,fp8,0,0.4749199867248535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,12,2,128,1,fp8,fp8,0,0.4775792121887207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,12,4,128,1,float16,float16,0,0.562494421005249
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,12,4,128,1,float16,fp8,0,0.54967360496521
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,12,4,128,1,fp8,fp8,0,0.5486591815948486
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,12,12,128,1,float16,float16,0,0.4570303916931152
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,12,12,128,1,float16,fp8,0,0.446398401260376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,12,12,128,1,fp8,fp8,0,0.44759359359741213
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,12,1,128,1,float16,float16,0,0.2361936092376709
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,12,1,128,1,float16,fp8,0,0.2332688093185425
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,12,1,128,1,fp8,fp8,0,0.2315984010696411
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,12,2,128,1,float16,float16,0,0.25556321144104005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,12,2,128,1,float16,fp8,0,0.2509040117263794
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,12,2,128,1,fp8,fp8,0,0.250548791885376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,12,4,128,1,float16,float16,0,0.29491519927978516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,12,4,128,1,float16,fp8,0,0.28921918869018554
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,12,4,128,1,fp8,fp8,0,0.2857759952545166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,12,12,128,1,float16,float16,0,0.2476367950439453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,12,12,128,1,float16,fp8,0,0.24135999679565429
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,12,12,128,1,fp8,fp8,0,0.24150240421295166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,12,1,128,1,float16,float16,0,0.13139040470123292
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,12,1,128,1,float16,fp8,0,0.12664320468902587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,12,1,128,1,fp8,fp8,0,0.12647520303726195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,12,2,128,1,float16,float16,0,0.14038079977035522
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,12,2,128,1,float16,fp8,0,0.13717600107192993
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,12,2,128,1,fp8,fp8,0,0.13691999912261962
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,12,4,128,1,float16,float16,0,0.15813920497894288
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,12,4,128,1,float16,fp8,0,0.1568608045578003
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,12,4,128,1,fp8,fp8,0,0.15728960037231446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,12,12,128,1,float16,float16,0,0.1374959945678711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,12,12,128,1,float16,fp8,0,0.1318400025367737
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,12,12,128,1,fp8,fp8,0,0.13086400032043458
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,12,1,128,1,float16,float16,0,0.07184320092201232
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,12,1,128,1,float16,fp8,0,0.06988959908485412
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,12,1,128,1,fp8,fp8,0,0.06984639763832093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,12,2,128,1,float16,float16,0,0.07693759799003601
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,12,2,128,1,float16,fp8,0,0.07491679787635804
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,12,2,128,1,fp8,fp8,0,0.07470239996910095
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,12,4,128,1,float16,float16,0,0.08920000195503235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,12,4,128,1,float16,fp8,0,0.08249120116233825
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,12,4,128,1,fp8,fp8,0,0.082150399684906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,12,12,128,1,float16,float16,0,0.07446560263633728
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,12,12,128,1,float16,fp8,0,0.06965759992599488
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,12,12,128,1,fp8,fp8,0,0.06981279850006103
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,12,1,128,1,float16,float16,0,0.046998399496078494
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,12,1,128,1,float16,fp8,0,0.0466592013835907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,12,1,128,1,fp8,fp8,0,0.0469648003578186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,12,2,128,1,float16,float16,0,0.04836159944534302
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,12,2,128,1,float16,fp8,0,0.04756160080432892
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,12,2,128,1,fp8,fp8,0,0.047793599963188174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,12,4,128,1,float16,float16,0,0.0521776020526886
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,12,4,128,1,float16,fp8,0,0.05231199860572815
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,12,4,128,1,fp8,fp8,0,0.052804797887802124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,12,1,128,1,float16,float16,0,1.0725359916687012
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,12,1,128,1,float16,fp8,0,1.0619359970092774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,12,1,128,1,fp8,fp8,0,1.0605072021484374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,12,2,128,1,float16,float16,0,1.1840815544128418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,12,2,128,1,float16,fp8,0,1.1664560317993165
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,12,2,128,1,fp8,fp8,0,1.1756256103515625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,12,4,128,1,float16,float16,0,1.4105839729309082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,12,4,128,1,float16,fp8,0,1.390505599975586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,12,12,128,1,float16,float16,0,1.172321605682373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,12,1,128,1,float16,float16,0,0.5495488166809082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,12,4,128,1,fp8,fp8,0,1.401905632019043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,12,1,128,1,float16,fp8,0,0.5452239990234375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,12,12,128,1,float16,fp8,0,1.165339183807373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,12,12,128,1,fp8,fp8,0,1.1419055938720704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,12,1,128,1,fp8,fp8,0,0.539569616317749
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,12,2,128,1,float16,float16,0,0.6050367832183838
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,12,2,128,1,float16,fp8,0,0.6000736236572266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,12,2,128,1,fp8,fp8,0,0.6021471977233886
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,12,4,128,1,float16,float16,0,0.7215392112731933
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,12,4,128,1,float16,fp8,0,0.7166255950927735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,12,4,128,1,fp8,fp8,0,0.707047986984253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,12,12,128,1,float16,float16,0,0.6030511856079102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,12,12,128,1,float16,fp8,0,0.5837791919708252
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,12,1,128,1,float16,float16,0,0.2881983995437622
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,12,1,128,1,float16,fp8,0,0.2832767963409424
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,12,12,128,1,fp8,fp8,0,0.5848911762237549
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,12,4,128,1,float16,float16,0,0.37206718921661375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,12,1,128,1,fp8,fp8,0,0.28459839820861815
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,12,2,128,1,float16,float16,0,0.3193135976791382
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,12,2,128,1,float16,fp8,0,0.31146719455718996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,12,2,128,1,fp8,fp8,0,0.3107248067855835
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,12,4,128,1,float16,fp8,0,0.3676784038543701
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,12,4,128,1,fp8,fp8,0,0.36681280136108396
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,12,12,128,1,float16,float16,0,0.31747519969940186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,12,12,128,1,float16,fp8,0,0.3033279895782471
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,12,12,128,1,fp8,fp8,0,0.3029088020324707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,12,1,128,1,float16,float16,0,0.1574399948120117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,12,1,128,1,float16,fp8,0,0.15389280319213866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,12,1,128,1,fp8,fp8,0,0.15416799783706664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,12,2,128,1,float16,float16,0,0.170742404460907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,12,2,128,1,float16,fp8,0,0.16914720535278321
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,12,2,128,1,fp8,fp8,0,0.16895519495010375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,12,4,128,1,float16,float16,0,0.19945759773254396
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,12,4,128,1,float16,fp8,0,0.19735360145568848
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,12,4,128,1,fp8,fp8,0,0.19769439697265626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,12,12,128,1,float16,float16,0,0.17146079540252684
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,12,12,128,1,float16,fp8,0,0.16353600025177
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,12,12,128,1,fp8,fp8,0,0.1632815957069397
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,12,1,128,1,float16,float16,0,0.0866752028465271
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,12,1,128,1,float16,fp8,0,0.08234239816665649
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,12,1,128,1,fp8,fp8,0,0.08191840052604675
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,12,2,128,1,float16,float16,0,0.0961296021938324
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,12,2,128,1,float16,fp8,0,0.09137279987335205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,12,2,128,1,fp8,fp8,0,0.08919680118560791
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,12,4,128,1,float16,float16,0,0.10990240573883056
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,12,4,128,1,float16,fp8,0,0.10631999969482422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,12,4,128,1,fp8,fp8,0,0.10682239532470703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,12,12,128,1,float16,float16,0,0.10124000310897827
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,12,12,128,1,float16,fp8,0,0.08635519742965699
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,12,12,128,1,fp8,fp8,0,0.08702880144119263
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,12,1,128,1,float16,float16,0,0.052502399682998656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,12,1,128,1,float16,fp8,0,0.050804799795150755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,12,1,128,1,fp8,fp8,0,0.05095840096473694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,12,2,128,1,float16,float16,0,0.05469120144844055
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,12,2,128,1,float16,fp8,0,0.05379520058631897
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,12,2,128,1,fp8,fp8,0,0.05353599786758423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,12,4,128,1,float16,float16,0,0.06134880185127258
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,12,4,128,1,float16,fp8,0,0.060073602199554446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,12,4,128,1,fp8,fp8,0,0.05994880199432373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,12,12,128,1,float16,float16,0,0.055257600545883176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,12,12,128,1,float16,fp8,0,0.055667197704315184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,12,12,128,1,fp8,fp8,0,0.05581120252609253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,12,2,128,1,fp8,fp8,0,0.04135519862174988
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,12,1,128,1,float16,float16,0,0.04036319851875305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,12,1,128,1,float16,fp8,0,0.04023520052433014
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,12,1,128,1,fp8,fp8,0,0.04074240028858185
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,12,2,128,1,float16,float16,0,0.041222399473190306
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,12,2,128,1,float16,fp8,0,0.04143519997596741
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,12,4,128,1,float16,float16,0,0.04359999895095825
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,12,4,128,1,float16,fp8,0,0.043617600202560426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,12,4,128,1,fp8,fp8,0,0.04362240135669708
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,12,1,128,1,float16,float16,0,1.2855615615844727
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,12,1,128,1,float16,fp8,0,1.2739999771118165
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,12,1,128,1,fp8,fp8,0,1.2723759651184081
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,12,2,128,1,float16,float16,0,1.4317232131958009
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,12,2,128,1,float16,fp8,0,1.4271280288696289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,12,2,128,1,fp8,fp8,0,1.4220335960388184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,12,4,128,1,float16,float16,0,1.7294912338256836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,12,4,128,1,float16,fp8,0,1.7246480941772462
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,12,1,128,1,float16,float16,0,0.6598400115966797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,12,12,128,1,float16,float16,0,1.4751184463500977
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,12,1,128,1,float16,fp8,0,0.6486832141876221
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,12,12,128,1,float16,fp8,0,1.4621071815490723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,12,4,128,1,fp8,fp8,0,1.7297056198120118
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,12,12,128,1,fp8,fp8,0,1.4660799980163575
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,12,1,128,1,fp8,fp8,0,0.652839994430542
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,12,2,128,1,float16,float16,0,0.7282400131225586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,12,2,128,1,float16,fp8,0,0.7286416053771972
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,12,2,128,1,fp8,fp8,0,0.7277791976928711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,12,4,128,1,float16,fp8,0,0.8797439575195313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,12,4,128,1,float16,float16,0,0.8756719589233398
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,12,4,128,1,fp8,fp8,0,0.8804287910461426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,12,12,128,1,float16,float16,0,0.7520512104034424
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,12,12,128,1,float16,fp8,0,0.7428143978118896
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,12,1,128,1,float16,float16,0,0.34257280826568604
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,12,1,128,1,float16,fp8,0,0.3408495903015137
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,12,12,128,1,fp8,fp8,0,0.7449359893798828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,12,1,128,1,fp8,fp8,0,0.3388767957687378
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,12,2,128,1,float16,float16,0,0.3797568082809448
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,12,2,128,1,float16,fp8,0,0.37861440181732176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,12,2,128,1,fp8,fp8,0,0.37635838985443115
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,12,4,128,1,float16,float16,0,0.4548992156982422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,12,12,128,1,float16,float16,0,0.39088640213012693
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,12,4,128,1,float16,fp8,0,0.45247998237609866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,12,4,128,1,fp8,fp8,0,0.4525167942047119
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,12,12,128,1,float16,fp8,0,0.386027193069458
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,12,12,128,1,fp8,fp8,0,0.38598079681396485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,12,1,128,1,float16,float16,0,0.1840224027633667
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,12,1,128,1,float16,fp8,0,0.18491679430007935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,12,1,128,1,fp8,fp8,0,0.18338240385055543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,12,2,128,1,float16,float16,0,0.20402240753173828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,12,2,128,1,float16,fp8,0,0.20350239276885987
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,12,2,128,1,fp8,fp8,0,0.2041167974472046
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,12,4,128,1,float16,float16,0,0.24056639671325683
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,12,4,128,1,float16,fp8,0,0.24045441150665284
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,12,4,128,1,fp8,fp8,0,0.23987679481506347
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,12,12,128,1,float16,float16,0,0.20767199993133545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,12,12,128,1,float16,fp8,0,0.2062160015106201
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,12,12,128,1,fp8,fp8,0,0.2062448024749756
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,12,1,128,1,float16,float16,0,0.10554879903793335
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,12,1,128,1,float16,fp8,0,0.10473920106887817
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,12,1,128,1,fp8,fp8,0,0.10487200021743774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,12,2,128,1,float16,float16,0,0.11407840251922607
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,12,2,128,1,float16,fp8,0,0.11352159976959228
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,12,2,128,1,fp8,fp8,0,0.11517119407653809
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,12,4,128,1,float16,float16,0,0.13286880254745484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,12,4,128,1,float16,fp8,0,0.13390079736709595
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,12,4,128,1,fp8,fp8,0,0.13457759618759155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,12,12,128,1,float16,float16,0,0.11498080492019654
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,12,12,128,1,float16,fp8,0,0.11237280368804932
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,12,12,128,1,fp8,fp8,0,0.11375199556350708
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,12,1,128,1,float16,float16,0,0.057014399766922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,12,1,128,1,float16,fp8,0,0.05829120278358459
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,12,1,128,1,fp8,fp8,0,0.05831999778747558
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,12,2,128,1,float16,float16,0,0.0626479983329773
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,12,2,128,1,float16,fp8,0,0.06393759846687316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,12,2,128,1,fp8,fp8,0,0.06385440230369568
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,12,4,128,1,float16,float16,0,0.07430400252342224
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,12,4,128,1,float16,fp8,0,0.07198240160942078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,12,4,128,1,fp8,fp8,0,0.0717087984085083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,12,12,128,1,float16,float16,0,0.0620464026927948
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,12,12,128,1,float16,fp8,0,0.06183680295944214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,12,12,128,1,fp8,fp8,0,0.06181600093841553
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,12,1,128,1,float16,float16,0,0.03817920088768005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,12,1,128,1,float16,fp8,0,0.039401599764823915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,12,1,128,1,fp8,fp8,0,0.03943839967250824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,12,2,128,1,float16,float16,0,0.03909280002117157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,12,2,128,1,float16,fp8,0,0.041679999232292174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,12,2,128,1,fp8,fp8,0,0.04196319878101349
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,12,4,128,1,float16,float16,0,0.04364160001277924
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,12,4,128,1,float16,fp8,0,0.04611999988555908
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,12,4,128,1,fp8,fp8,0,0.04640159904956818
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,12,12,128,1,float16,float16,0,0.05612800121307373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,12,12,128,1,float16,fp8,0,0.0571727991104126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,12,12,128,1,fp8,fp8,0,0.056888002157211306
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,12,1,128,1,float16,float16,0,0.040252798795700075
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,12,1,128,1,float16,fp8,0,0.04241600036621094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,12,1,128,1,fp8,fp8,0,0.043105599284172055
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,12,2,128,1,float16,float16,0,0.043584001064300534
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,12,2,128,1,float16,fp8,0,0.044791999459266665
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,12,2,128,1,fp8,fp8,0,0.04455040097236633
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,12,4,128,1,float16,float16,0,0.04623039960861206
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,12,4,128,1,float16,fp8,0,0.047502401471138
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,12,4,128,1,fp8,fp8,0,0.04696959853172302
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,12,1,128,1,float16,float16,0,0.8437904357910156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,12,1,128,1,float16,fp8,0,0.8520671844482421
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,12,1,128,1,fp8,fp8,0,0.8576000213623047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,12,2,128,1,float16,float16,0,0.9941807746887207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,12,2,128,1,float16,fp8,0,1.0107248306274415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,12,2,128,1,fp8,fp8,0,1.0113679885864257
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,12,4,128,1,float16,fp8,0,1.3071392059326172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,12,1,128,1,float16,float16,0,0.4348911762237549
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,12,4,128,1,float16,float16,0,1.2904656410217286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,12,1,128,1,float16,fp8,0,0.4432112216949463
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,12,4,128,1,fp8,fp8,0,1.312547206878662
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,12,12,128,1,float16,fp8,0,1.2550543785095214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,12,12,128,1,float16,float16,0,1.252676773071289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,12,12,128,1,fp8,fp8,0,1.2539088249206543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,12,1,128,1,fp8,fp8,0,0.43978080749511717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,12,2,128,1,float16,float16,0,0.5109200000762939
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,12,4,128,1,float16,fp8,0,0.6671455860137939
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,12,2,128,1,float16,fp8,0,0.5178127765655518
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,12,2,128,1,fp8,fp8,0,0.5182112216949463
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,12,4,128,1,float16,float16,0,0.6603856086730957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,12,4,128,1,fp8,fp8,0,0.6650335788726807
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,12,12,128,1,float16,float16,0,0.6425551891326904
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,12,12,128,1,float16,fp8,0,0.6391024112701416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,12,1,128,1,float16,float16,0,0.23106400966644286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,12,1,128,1,float16,fp8,0,0.23471839427948
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,12,12,128,1,fp8,fp8,0,0.6401311874389648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,12,1,128,1,fp8,fp8,0,0.23563520908355712
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,12,2,128,1,float16,float16,0,0.26495039463043213
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,12,2,128,1,float16,fp8,0,0.2718400001525879
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,12,2,128,1,fp8,fp8,0,0.2704560041427612
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,12,4,128,1,float16,float16,0,0.34241600036621095
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,12,4,128,1,float16,fp8,0,0.34533278942108153
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,12,4,128,1,fp8,fp8,0,0.3458240032196045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,12,12,128,1,float16,float16,0,0.33390400409698484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,12,12,128,1,float16,fp8,0,0.33158559799194337
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,12,2,128,1,float16,float16,0,0.14603040218353272
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,12,1,128,1,float16,float16,0,0.12602880001068115
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,12,12,128,1,fp8,fp8,0,0.33163840770721437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,12,1,128,1,float16,fp8,0,0.1299823999404907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,12,1,128,1,fp8,fp8,0,0.1298799991607666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,12,2,128,1,float16,fp8,0,0.14894239902496337
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,12,2,128,1,fp8,fp8,0,0.14837759733200073
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,12,4,128,1,float16,float16,0,0.1824623942375183
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,12,4,128,1,float16,fp8,0,0.18470560312271117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,12,4,128,1,fp8,fp8,0,0.18546559810638427
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,12,12,128,1,float16,float16,0,0.1779055953025818
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,12,12,128,1,float16,fp8,0,0.17726399898529052
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,12,12,128,1,fp8,fp8,0,0.17793279886245728
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,12,1,128,1,float16,float16,0,0.07648800015449524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,12,1,128,1,float16,fp8,0,0.0768671989440918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,12,1,128,1,fp8,fp8,0,0.07535359859466553
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,12,2,128,1,float16,float16,0,0.08454880118370056
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,12,2,128,1,float16,fp8,0,0.08616480231285095
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,12,2,128,1,fp8,fp8,0,0.08616480231285095
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,12,4,128,1,float16,float16,0,0.10382399559020997
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,12,4,128,1,float16,fp8,0,0.10539040565490723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,12,4,128,1,fp8,fp8,0,0.10510879755020142
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,12,12,128,1,float16,float16,0,0.10015039443969727
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,12,12,128,1,float16,fp8,0,0.09845600128173829
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,12,12,128,1,fp8,fp8,0,0.09802079796791077
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,12,1,128,1,float16,float16,0,0.042884799838066104
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,12,1,128,1,float16,fp8,0,0.045491200685501096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,12,1,128,1,fp8,fp8,0,0.045633599162101746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,12,2,128,1,float16,float16,0,0.04829440116882324
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,12,2,128,1,float16,fp8,0,0.05046399831771851
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,12,2,128,1,fp8,fp8,0,0.050607997179031375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,12,4,128,1,float16,float16,0,0.05976960062980652
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,12,4,128,1,float16,fp8,0,0.058340799808502194
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,12,4,128,1,fp8,fp8,0,0.05896639823913574
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,12,12,128,1,float16,fp8,0,0.05508480072021484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,12,12,128,1,fp8,fp8,0,0.05504000186920166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,12,12,128,1,float16,float16,0,0.056657600402832034
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,12,1,128,1,float16,float16,0,0.032204800844192506
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,12,1,128,1,float16,fp8,0,0.03442240059375763
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,12,1,128,1,fp8,fp8,0,0.034308800101280214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,12,2,128,1,float16,float16,0,0.033564800024032594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,12,2,128,1,float16,fp8,0,0.035980799794197084
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,12,2,128,1,fp8,fp8,0,0.03551680147647858
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,12,4,128,1,float16,float16,0,0.03810079991817474
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,12,4,128,1,float16,fp8,0,0.03994239866733551
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,12,4,128,1,fp8,fp8,0,0.03989279866218567
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,12,12,128,1,float16,float16,0,0.03720960021018982
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,12,12,128,1,float16,fp8,0,0.03867200016975403
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,12,12,128,1,fp8,fp8,0,0.038859200477600095
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,12,1,128,1,float16,float16,0,0.027561599016189577
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,12,1,128,1,float16,fp8,0,0.02909280061721802
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,12,1,128,1,fp8,fp8,0,0.028697600960731505
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,12,2,128,1,float16,float16,0,0.027735999226570128
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,12,2,128,1,float16,fp8,0,0.029198399186134337
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,12,2,128,1,fp8,fp8,0,0.02953920066356659
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,12,4,128,1,float16,float16,0,0.028729599714279175
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,12,4,128,1,float16,fp8,0,0.03035840094089508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,12,4,128,1,fp8,fp8,0,0.030782398581504822
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,12,12,128,1,float16,float16,0,0.030353599786758424
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,12,12,128,1,float16,fp8,0,0.032025599479675294
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,12,12,128,1,fp8,fp8,0,0.03206880092620849
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,12,1,128,1,float16,float16,0,0.02688640058040619
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,12,1,128,1,float16,fp8,0,0.028412801027297974
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,12,1,128,1,fp8,fp8,0,0.02871519923210144
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,12,2,128,1,float16,float16,0,0.027291199564933775
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,12,2,128,1,float16,fp8,0,0.02902719974517822
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,12,2,128,1,fp8,fp8,0,0.028395199775695802
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,12,4,128,1,float16,float16,0,0.027297601103782654
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,12,4,128,1,float16,fp8,0,0.028918400406837463
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,12,4,128,1,fp8,fp8,0,0.02871200144290924
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,12,1,128,1,float16,float16,0,0.365011191368103
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,12,2,128,1,float16,fp8,0,0.45320959091186525
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,12,1,128,1,float16,fp8,0,0.378385591506958
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,12,1,128,1,fp8,fp8,0,0.3795775890350342
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,12,2,128,1,float16,float16,0,0.44073758125305174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,12,2,128,1,fp8,fp8,0,0.4541759967803955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,12,4,128,1,float16,float16,0,0.5894815921783447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,12,4,128,1,float16,fp8,0,0.6030335903167725
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,12,4,128,1,fp8,fp8,0,0.6052432060241699
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,12,12,128,1,float16,float16,0,0.6065887928009033
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,12,1,128,1,fp8,fp8,0,0.20312960147857667
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,12,1,128,1,float16,float16,0,0.19636000394821168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,12,12,128,1,float16,fp8,0,0.5970799922943115
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,12,1,128,1,float16,fp8,0,0.20261120796203613
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,12,12,128,1,fp8,fp8,0,0.5974624156951904
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,12,2,128,1,float16,float16,0,0.23117280006408691
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,12,2,128,1,float16,fp8,0,0.23934559822082518
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,12,2,128,1,fp8,fp8,0,0.2385632038116455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,12,4,128,1,float16,float16,0,0.3070240020751953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,12,4,128,1,float16,fp8,0,0.314683198928833
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,12,4,128,1,fp8,fp8,0,0.31378560066223143
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,12,12,128,1,float16,float16,0,0.3158207893371582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,12,12,128,1,float16,fp8,0,0.3088128089904785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,12,1,128,1,float16,float16,0,0.1098479986190796
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,12,12,128,1,fp8,fp8,0,0.3097088098526001
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,12,1,128,1,float16,fp8,0,0.11375999450683594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,12,1,128,1,fp8,fp8,0,0.11422560214996338
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,12,2,128,1,float16,float16,0,0.12868000268936158
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,12,2,128,1,float16,fp8,0,0.1325536012649536
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,12,2,128,1,fp8,fp8,0,0.13207999467849732
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,12,4,128,1,float16,float16,0,0.16500320434570312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,12,4,128,1,float16,fp8,0,0.16895359754562378
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,12,4,128,1,fp8,fp8,0,0.16920640468597412
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,12,12,128,1,float16,float16,0,0.16993759870529174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,12,12,128,1,float16,fp8,0,0.16632479429244995
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,12,12,128,1,fp8,fp8,0,0.16653120517730713
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,12,1,128,1,float16,float16,0,0.06764000058174133
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,12,1,128,1,float16,fp8,0,0.06740480065345764
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,12,1,128,1,fp8,fp8,0,0.06753119826316833
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,12,2,128,1,float16,float16,0,0.07571039795875549
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,12,2,128,1,float16,fp8,0,0.07778400182723999
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,12,2,128,1,fp8,fp8,0,0.07687519788742066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,12,4,128,1,float16,float16,0,0.09511839747428893
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,12,4,128,1,float16,fp8,0,0.09588000178337097
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,12,4,128,1,fp8,fp8,0,0.09640160202980042
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,12,12,128,1,float16,float16,0,0.09569759964942932
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,12,12,128,1,float16,fp8,0,0.09069920182228089
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,12,12,128,1,fp8,fp8,0,0.0920960009098053
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,12,1,128,1,float16,float16,0,0.03827039897441864
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,12,1,128,1,float16,fp8,0,0.03926720023155213
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,12,1,128,1,fp8,fp8,0,0.039192000031471254
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,12,2,128,1,float16,float16,0,0.04412800073623657
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,12,2,128,1,float16,fp8,0,0.044491198658943173
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,12,2,128,1,fp8,fp8,0,0.04391199946403503
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,12,4,128,1,float16,float16,0,0.05487040281295776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,12,4,128,1,float16,fp8,0,0.05291360020637512
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,12,4,128,1,fp8,fp8,0,0.05267840027809143
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,12,12,128,1,float16,float16,0,0.053091198205947876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,12,12,128,1,float16,fp8,0,0.05165119767189026
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,12,12,128,1,fp8,fp8,0,0.051235198974609375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,12,1,128,1,float16,float16,0,0.029047998785972595
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,12,1,128,1,float16,fp8,0,0.031164801120758055
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,12,1,128,1,fp8,fp8,0,0.03094240128993988
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,12,2,128,1,float16,float16,0,0.030144000053405763
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,12,2,128,1,float16,fp8,0,0.03210720121860504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,12,2,128,1,fp8,fp8,0,0.03209120035171509
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,12,4,128,1,float16,float16,0,0.03443840146064758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,12,4,128,1,float16,fp8,0,0.03627679944038391
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,12,4,128,1,fp8,fp8,0,0.03670719861984253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,12,12,128,1,float16,float16,0,0.034092798829078674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,12,12,128,1,float16,fp8,0,0.034841600060462954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,12,12,128,1,fp8,fp8,0,0.03506079912185669
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,12,1,128,1,float16,float16,0,0.024294400215148927
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,12,1,128,1,float16,fp8,0,0.025489598512649536
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,12,1,128,1,fp8,fp8,0,0.025441598892211915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,12,2,128,1,float16,float16,0,0.02462400048971176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,12,2,128,1,float16,fp8,0,0.025619199872016905
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,12,2,128,1,fp8,fp8,0,0.025761601328849793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,12,4,128,1,float16,float16,0,0.025923201441764833
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,12,4,128,1,float16,fp8,0,0.026697599887847902
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,12,4,128,1,fp8,fp8,0,0.026502400636672974
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,12,12,128,1,float16,float16,0,0.02744320034980774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,12,12,128,1,float16,fp8,0,0.028369599580764772
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,12,12,128,1,fp8,fp8,0,0.02831520140171051
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,12,1,128,1,float16,float16,0,0.024035200476646423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,12,1,128,1,float16,fp8,0,0.025360000133514405
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,12,1,128,1,fp8,fp8,0,0.02481440007686615
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,12,2,128,1,float16,float16,0,0.023972800374031066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,12,2,128,1,float16,fp8,0,0.025391998887062072
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,12,2,128,1,fp8,fp8,0,0.02502720057964325
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,12,4,128,1,float16,float16,0,0.02428639978170395
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,12,4,128,1,float16,fp8,0,0.025409600138664244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,12,4,128,1,fp8,fp8,0,0.025380799174308778
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,12,12,128,1,float16,float16,0,0.023632000386714935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,12,12,128,1,float16,fp8,0,0.02462719976902008
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,12,12,128,1,fp8,fp8,0,0.02483679950237274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,12,1,128,1,float16,float16,0,0.022609600424766542
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,12,1,128,1,float16,fp8,0,0.02345760017633438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,12,1,128,1,fp8,fp8,0,0.023721599578857423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,12,2,128,1,float16,float16,0,0.022603200376033784
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,12,2,128,1,float16,fp8,0,0.02393600046634674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,12,2,128,1,fp8,fp8,0,0.02381120026111603
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,12,1,128,1,fp8,fp8,0,0.196124804019928
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,12,4,128,1,float16,float16,0,0.022886399924755097
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,12,4,128,1,float16,fp8,0,0.0238864004611969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,12,2,128,1,float16,fp8,0,0.23426239490509032
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,12,4,128,1,fp8,fp8,0,0.023982399702072145
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,12,1,128,1,float16,float16,0,0.18633919954299927
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,12,1,128,1,float16,fp8,0,0.1964975953102112
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,12,2,128,1,float16,float16,0,0.2215183973312378
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,12,2,128,1,fp8,fp8,0,0.23322720527648927
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,12,4,128,1,float16,float16,0,0.2973184108734131
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,12,4,128,1,float16,fp8,0,0.30856800079345703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,12,4,128,1,fp8,fp8,0,0.30781600475311277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,12,12,128,1,float16,float16,0,0.31562879085540774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,12,12,128,1,float16,fp8,0,0.3080415964126587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,12,12,128,1,fp8,fp8,0,0.30770559310913087
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,12,1,128,1,float16,float16,0,0.10599520206451415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,12,1,128,1,float16,fp8,0,0.10902559757232666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,12,1,128,1,fp8,fp8,0,0.10926079750061035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,12,2,128,1,float16,float16,0,0.12456640005111694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,12,2,128,1,float16,fp8,0,0.12803200483322144
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,12,2,128,1,fp8,fp8,0,0.12738399505615233
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,12,4,128,1,float16,float16,0,0.16027519702911378
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,12,4,128,1,float16,fp8,0,0.1636639952659607
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,12,4,128,1,fp8,fp8,0,0.16354720592498778
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,12,12,128,1,float16,float16,0,0.16894079446792604
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,12,12,128,1,float16,fp8,0,0.16301120519638063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,12,12,128,1,fp8,fp8,0,0.16311359405517578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,12,1,128,1,float16,float16,0,0.06516479849815368
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,12,1,128,1,float16,fp8,0,0.06279199719429016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,12,1,128,1,fp8,fp8,0,0.0635312020778656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,12,2,128,1,float16,float16,0,0.07303040027618408
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,12,2,128,1,float16,fp8,0,0.07277600169181823
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,12,2,128,1,fp8,fp8,0,0.07292960286140442
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,12,4,128,1,float16,float16,0,0.09240959882736206
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,12,4,128,1,float16,fp8,0,0.09201440215110779
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,12,4,128,1,fp8,fp8,0,0.09197919964790344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,12,12,128,1,float16,float16,0,0.09348959922790527
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,12,12,128,1,float16,fp8,0,0.08576160073280334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,12,12,128,1,fp8,fp8,0,0.0867680013179779
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,12,1,128,1,float16,float16,0,0.03439840078353882
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,12,1,128,1,float16,fp8,0,0.035750401020050046
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,12,1,128,1,fp8,fp8,0,0.0355648010969162
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,12,4,128,1,float16,fp8,0,0.047793599963188174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,12,2,128,1,float16,float16,0,0.04071199893951416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,12,2,128,1,float16,fp8,0,0.040454399585723874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,12,2,128,1,fp8,fp8,0,0.04008800089359284
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,12,4,128,1,float16,float16,0,0.05129920244216919
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,12,4,128,1,fp8,fp8,0,0.048188799619674684
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,12,1,128,1,fp8,fp8,0,0.02884640097618103
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,12,12,128,1,float16,float16,0,0.05163840055465698
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,12,12,128,1,float16,fp8,0,0.049377599358558656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,12,12,128,1,fp8,fp8,0,0.049275198578834535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,12,1,128,1,float16,float16,0,0.027663999795913698
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,12,1,128,1,float16,fp8,0,0.02871040105819702
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,12,2,128,1,float16,float16,0,0.028641599416732787
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,12,2,128,1,float16,fp8,0,0.029814401268959047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,12,2,128,1,fp8,fp8,0,0.029764801263809204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,12,4,128,1,float16,float16,0,0.03297280073165894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,12,4,128,1,float16,fp8,0,0.034625598788261415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,12,4,128,1,fp8,fp8,0,0.034230399131774905
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,12,12,128,1,float16,float16,0,0.03249120116233826
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,12,12,128,1,float16,fp8,0,0.03284800052642822
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,12,12,128,1,fp8,fp8,0,0.03301919996738434
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,12,1,128,1,float16,float16,0,0.022892799973487855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,12,1,128,1,float16,fp8,0,0.023764799535274505
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,12,1,128,1,fp8,fp8,0,0.023745599389076232
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,12,2,128,1,float16,float16,0,0.02333440035581589
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,12,2,128,1,float16,fp8,0,0.024086399376392363
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,12,2,128,1,fp8,fp8,0,0.023830400407314302
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,12,4,128,1,float16,float16,0,0.02401600033044815
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,12,4,128,1,float16,fp8,0,0.02462079972028732
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,12,4,128,1,fp8,fp8,0,0.025089600682258607
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,12,12,128,1,float16,float16,0,0.025971201062202454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,12,2,128,1,float16,float16,0,0.022644799947738648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,12,12,128,1,float16,fp8,0,0.02653760015964508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,12,12,128,1,fp8,fp8,0,0.026700800657272337
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,12,1,128,1,float16,float16,0,0.02239840030670166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,12,1,128,1,float16,fp8,0,0.023027199506759643
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,12,1,128,1,fp8,fp8,0,0.022996799647808076
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,12,2,128,1,float16,fp8,0,0.023105600476264955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,12,2,128,1,fp8,fp8,0,0.02303680032491684
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,12,4,128,1,float16,float16,0,0.022844800353050233
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,12,4,128,1,float16,fp8,0,0.02375999987125397
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,12,4,128,1,fp8,fp8,0,0.02375999987125397
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,12,12,128,1,float16,float16,0,0.0221903994679451
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,12,12,128,1,float16,fp8,0,0.023081600666046143
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,12,12,128,1,fp8,fp8,0,0.022567999362945557
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,12,1,128,1,float16,float16,0,0.021217599511146545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,12,1,128,1,float16,fp8,0,0.021704000234603883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,12,1,128,1,fp8,fp8,0,0.021740800142288207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,12,2,128,1,float16,float16,0,0.021222400665283202
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,12,2,128,1,float16,fp8,0,0.022115199267864226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,12,2,128,1,fp8,fp8,0,0.022155199944972993
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,12,4,128,1,float16,float16,0,0.021294400095939636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,12,4,128,1,float16,fp8,0,0.021937599778175353
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,12,4,128,1,fp8,fp8,0,0.02221119999885559
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,12,12,128,1,float16,float16,0,0.020571200549602507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,12,12,128,1,float16,fp8,0,0.02122880071401596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,12,12,128,1,fp8,fp8,0,0.021012799441814424
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,12,1,128,1,float16,float16,0,0.020049600303173064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,12,1,128,1,float16,fp8,0,0.021036800742149354
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,12,1,128,1,fp8,fp8,0,0.020977599918842314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,12,2,128,1,float16,float16,0,0.02003519982099533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,12,2,128,1,float16,fp8,0,0.020960000157356263
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,12,2,128,1,fp8,fp8,0,0.020908799767494202
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,12,1,128,1,fp8,fp8,0,0.10981119871139526
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,12,4,128,1,float16,float16,0,0.02012320011854172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,12,4,128,1,float16,fp8,0,0.02096640020608902
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,12,4,128,1,fp8,fp8,0,0.021185599267482758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,12,1,128,1,float16,float16,0,0.10528800487518311
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,12,1,128,1,float16,fp8,0,0.11023839712142944
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,12,2,128,1,float16,float16,0,0.12445119619369507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,12,2,128,1,float16,fp8,0,0.1286255955696106
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,12,2,128,1,fp8,fp8,0,0.12854399681091308
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,12,4,128,1,float16,float16,0,0.17585279941558837
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,12,4,128,1,float16,fp8,0,0.18076640367507935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,12,4,128,1,fp8,fp8,0,0.18117280006408693
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,12,12,128,1,float16,float16,0,0.19055839776992797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,12,12,128,1,float16,fp8,0,0.1900496006011963
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,12,12,128,1,fp8,fp8,0,0.19012320041656494
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,12,1,128,1,float16,float16,0,0.06500160098075866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,12,1,128,1,float16,fp8,0,0.06377599835395813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,12,1,128,1,fp8,fp8,0,0.06326559782028199
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,12,2,128,1,float16,float16,0,0.0736944019794464
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,12,2,128,1,float16,fp8,0,0.07233440279960632
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,12,2,128,1,fp8,fp8,0,0.07322720289230347
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,12,4,128,1,float16,float16,0,0.10025600194931031
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,12,4,128,1,float16,fp8,0,0.10065599679946899
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,12,4,128,1,fp8,fp8,0,0.10072959661483764
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,12,12,128,1,float16,float16,0,0.10550240278244019
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,12,12,128,1,float16,fp8,0,0.1010975956916809
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,12,12,128,1,fp8,fp8,0,0.10003360509872436
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,12,1,128,1,float16,float16,0,0.034230399131774905
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,12,1,128,1,float16,fp8,0,0.03612160086631775
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,12,1,128,1,fp8,fp8,0,0.03587839901447296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,12,2,128,1,float16,float16,0,0.04101119935512543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,12,2,128,1,float16,fp8,0,0.040715199708938596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,12,12,128,1,float16,fp8,0,0.05365440249443054
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,12,2,128,1,fp8,fp8,0,0.0408048003911972
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,12,4,128,1,float16,float16,0,0.054915201663970944
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,12,4,128,1,float16,fp8,0,0.05236319899559021
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,12,4,128,1,fp8,fp8,0,0.052288001775741576
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,12,12,128,1,float16,float16,0,0.05456799864768982
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,12,12,128,1,fp8,fp8,0,0.05407040119171143
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,12,1,128,1,float16,float16,0,0.02794080078601837
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,12,1,128,1,float16,fp8,0,0.029193601012229918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,12,1,128,1,fp8,fp8,0,0.02918879985809326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,12,2,128,1,float16,float16,0,0.02876160144805908
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,12,2,128,1,float16,fp8,0,0.030257600545883178
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,12,2,128,1,fp8,fp8,0,0.030115199089050294
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,12,4,128,1,float16,float16,0,0.03291040062904358
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,12,4,128,1,float16,fp8,0,0.03443360030651092
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,12,4,128,1,fp8,fp8,0,0.03431519865989685
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,12,12,128,1,float16,float16,0,0.035872000455856326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,12,12,128,1,float16,fp8,0,0.03744480013847351
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,12,12,128,1,fp8,fp8,0,0.03731040060520172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,12,1,128,1,float16,float16,0,0.022643199563026427
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,12,1,128,1,float16,fp8,0,0.02377600073814392
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,12,1,128,1,fp8,fp8,0,0.023473599553108217
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,12,4,128,1,fp8,fp8,0,0.02489120066165924
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,12,2,128,1,float16,float16,0,0.02319840043783188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,12,2,128,1,float16,fp8,0,0.024051199853420257
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,12,2,128,1,fp8,fp8,0,0.02359199970960617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,12,4,128,1,float16,float16,0,0.024028800427913666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,12,4,128,1,float16,fp8,0,0.024857600033283234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,12,12,128,1,float16,float16,0,0.025577598810195924
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,12,12,128,1,float16,fp8,0,0.026107200980186464
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,12,12,128,1,fp8,fp8,0,0.026689600944519044
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,12,1,128,1,float16,float16,0,0.022473600506782532
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,12,1,128,1,float16,fp8,0,0.023235200345516203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,12,1,128,1,fp8,fp8,0,0.023343999683856965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,12,2,128,1,float16,float16,0,0.022225600481033326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,12,2,128,1,float16,fp8,0,0.02343519926071167
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,12,2,128,1,fp8,fp8,0,0.0232464000582695
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,12,4,128,1,float16,float16,0,0.022759999334812164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,12,4,128,1,float16,fp8,0,0.023500800132751465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,12,4,128,1,fp8,fp8,0,0.023414400219917298
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,12,12,128,1,float16,float16,0,0.021848000586032867
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,12,12,128,1,float16,fp8,0,0.022489599883556366
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,12,12,128,1,fp8,fp8,0,0.02295839935541153
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,12,1,128,1,float16,float16,0,0.02109760046005249
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,12,1,128,1,float16,fp8,0,0.021873599290847777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,12,1,128,1,fp8,fp8,0,0.021712000668048858
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,12,2,128,1,float16,float16,0,0.021198399364948273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,12,2,128,1,float16,fp8,0,0.022009600698947907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,12,2,128,1,fp8,fp8,0,0.021937599778175353
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,12,4,128,1,float16,float16,0,0.021040000021457672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,12,4,128,1,float16,fp8,0,0.022041599452495574
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,12,4,128,1,fp8,fp8,0,0.02208160012960434
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,12,12,128,1,float16,float16,0,0.020467199385166168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,12,12,128,1,float16,fp8,0,0.021358400583267212
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,12,12,128,1,fp8,fp8,0,0.021447999775409697
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,12,1,128,1,float16,float16,0,0.019966399669647215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,12,1,128,1,float16,fp8,0,0.02096640020608902
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,12,1,128,1,fp8,fp8,0,0.02094080001115799
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,12,2,128,1,float16,float16,0,0.019964799284934998
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,12,2,128,1,float16,fp8,0,0.020772799849510193
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,12,2,128,1,fp8,fp8,0,0.020923200249671935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,12,4,128,1,float16,float16,0,0.020214399695396422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,12,4,128,1,float16,fp8,0,0.021048000454902648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,12,4,128,1,fp8,fp8,0,0.021084800362586975
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,12,12,128,1,float16,float16,0,0.02000479996204376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,12,12,128,1,float16,fp8,0,0.020635199546813966
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,12,12,128,1,fp8,fp8,0,0.020667199790477753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,12,1,128,1,float16,float16,0,0.019620800018310548
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,12,1,128,1,float16,fp8,0,0.02062560021877289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,12,1,128,1,fp8,fp8,0,0.020270399749279022
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,12,2,128,1,float16,float16,0,0.01964640021324158
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,12,2,128,1,float16,fp8,0,0.02024960070848465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,12,2,128,1,fp8,fp8,0,0.020403200387954713
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,12,4,128,1,float16,float16,0,0.01950239986181259
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,12,4,128,1,float16,fp8,0,0.020576000213623047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,12,4,128,1,fp8,fp8,0,0.020587199926376344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,12,1,128,1,float16,float16,0,0.06630880236625672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,12,1,128,1,float16,fp8,0,0.06393120288848878
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,12,1,128,1,fp8,fp8,0,0.06534240245819092
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,12,2,128,1,float16,float16,0,0.08131999969482422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,12,2,128,1,float16,fp8,0,0.08302720189094544
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,12,2,128,1,fp8,fp8,0,0.08297920227050781
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,12,12,128,1,float16,fp8,0,0.12921119928359986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,12,4,128,1,float16,float16,0,0.10060640573501586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,12,4,128,1,float16,fp8,0,0.10255039930343628
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,12,4,128,1,fp8,fp8,0,0.1022752046585083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,12,12,128,1,float16,float16,0,0.12781440019607543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,12,12,128,1,fp8,fp8,0,0.12796640396118164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,12,1,128,1,float16,float16,0,0.035076799988746646
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,12,1,128,1,float16,fp8,0,0.03711999952793121
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,12,1,128,1,fp8,fp8,0,0.03734720051288605
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,12,2,128,1,float16,float16,0,0.04477120041847229
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,12,2,128,1,float16,fp8,0,0.04580479860305786
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,12,2,128,1,fp8,fp8,0,0.04623839855194092
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,12,4,128,1,float16,float16,0,0.05526559948921204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,12,4,128,1,float16,fp8,0,0.05360159873962402
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,12,4,128,1,fp8,fp8,0,0.053307199478149415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,12,12,128,1,float16,float16,0,0.06734240055084229
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,12,12,128,1,float16,fp8,0,0.06776800155639648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,12,12,128,1,fp8,fp8,0,0.06724640130996704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,12,1,128,1,float16,float16,0,0.02834399938583374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,12,1,128,1,float16,fp8,0,0.029798400402069092
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,12,1,128,1,fp8,fp8,0,0.029865598678588866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,12,2,128,1,float16,float16,0,0.029108801484107973
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,12,2,128,1,float16,fp8,0,0.030959999561309813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,12,2,128,1,fp8,fp8,0,0.030729600787162782
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,12,4,128,1,float16,float16,0,0.033078399300575254
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,12,4,128,1,float16,fp8,0,0.035067200660705566
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,12,4,128,1,fp8,fp8,0,0.034990400075912476
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,12,12,128,1,float16,float16,0,0.040092799067497256
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,12,12,128,1,float16,fp8,0,0.04210399985313416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,12,12,128,1,fp8,fp8,0,0.04223200082778931
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,12,1,128,1,float16,float16,0,0.023391999304294586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,12,1,128,1,float16,fp8,0,0.024142399430274963
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,12,1,128,1,fp8,fp8,0,0.023612800240516662
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,12,2,128,1,float16,float16,0,0.023307199776172637
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,12,2,128,1,float16,fp8,0,0.02420479953289032
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,12,2,128,1,fp8,fp8,0,0.024323199689388276
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,12,4,128,1,float16,float16,0,0.0242576003074646
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,12,4,128,1,float16,fp8,0,0.025193598866462708
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,12,4,128,1,fp8,fp8,0,0.025174400210380553
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,12,12,128,1,float16,float16,0,0.029187199473381043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,12,12,128,1,float16,fp8,0,0.03062080144882202
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,12,1,128,1,float16,float16,0,0.02237759977579117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,12,12,128,1,fp8,fp8,0,0.030888000130653383
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,12,1,128,1,float16,fp8,0,0.023134399950504304
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,12,1,128,1,fp8,fp8,0,0.023319999873638152
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,12,2,128,1,float16,float16,0,0.0226623997092247
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,12,2,128,1,float16,fp8,0,0.023175999522209167
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,12,2,128,1,fp8,fp8,0,0.023235200345516203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,12,4,128,1,float16,float16,0,0.02239679992198944
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,12,4,128,1,float16,fp8,0,0.023145599663257597
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,12,4,128,1,fp8,fp8,0,0.02364480048418045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,12,12,128,1,float16,float16,0,0.022009600698947907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,12,12,128,1,float16,fp8,0,0.022592000663280487
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,12,12,128,1,fp8,fp8,0,0.022870400547981264
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,12,1,128,1,float16,float16,0,0.021096000075340272
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,12,1,128,1,float16,fp8,0,0.02218720018863678
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,12,1,128,1,fp8,fp8,0,0.021910400688648225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,12,2,128,1,float16,float16,0,0.02112800031900406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,12,12,128,1,float16,fp8,0,0.021422399580478667
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,12,2,128,1,float16,fp8,0,0.022094400227069856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,12,2,128,1,fp8,fp8,0,0.021902400255203246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,12,4,128,1,float16,float16,0,0.021012799441814424
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,12,4,128,1,float16,fp8,0,0.021958400309085847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,12,4,128,1,fp8,fp8,0,0.022064000368118286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,12,12,128,1,float16,float16,0,0.020745599269866945
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,12,12,128,1,fp8,fp8,0,0.021508799493312837
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,12,1,128,1,float16,float16,0,0.020076799392700195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,12,1,128,1,float16,fp8,0,0.02070239931344986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,12,1,128,1,fp8,fp8,0,0.020923200249671935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,12,12,128,1,float16,float16,0,0.019812799990177155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,12,2,128,1,float16,float16,0,0.020147199928760528
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,12,2,128,1,float16,fp8,0,0.02069920003414154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,12,2,128,1,fp8,fp8,0,0.02114879935979843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,12,4,128,1,float16,float16,0,0.020275199413299562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,12,4,128,1,float16,fp8,0,0.02106879949569702
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,12,4,128,1,fp8,fp8,0,0.021001599729061127
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,12,12,128,1,float16,fp8,0,0.021006399393081666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,12,12,128,1,fp8,fp8,0,0.02080000042915344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,12,1,128,1,float16,float16,0,0.01956319957971573
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,12,1,128,1,float16,fp8,0,0.02030719965696335
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,12,1,128,1,fp8,fp8,0,0.020689600706100465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,12,2,128,1,float16,float16,0,0.019750399887561797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,12,2,128,1,float16,fp8,0,0.020531199872493744
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,12,2,128,1,fp8,fp8,0,0.02041279971599579
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,12,4,128,1,float16,float16,0,0.019728000462055206
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,12,4,128,1,float16,fp8,0,0.02027679979801178
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,12,4,128,1,fp8,fp8,0,0.020403200387954713
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,12,12,128,1,float16,float16,0,0.019577600061893463
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,12,12,128,1,float16,fp8,0,0.020742399990558623
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,12,12,128,1,fp8,fp8,0,0.020633600652217865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,12,1,128,1,float16,float16,0,0.01950719952583313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,12,1,128,1,float16,fp8,0,0.02022400051355362
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,12,1,128,1,fp8,fp8,0,0.020395199954509734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,12,2,128,1,float16,float16,0,0.01961439996957779
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,12,2,128,1,float16,fp8,0,0.02043360024690628
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,12,2,128,1,fp8,fp8,0,0.02032800018787384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,12,4,128,1,float16,float16,0,0.01969279944896698
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,12,4,128,1,float16,fp8,0,0.020571200549602507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,12,4,128,1,fp8,fp8,0,0.0205375999212265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,12,1,128,1,float16,float16,0,0.040310400724411014
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,12,1,128,1,float16,fp8,0,0.04376960098743439
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,12,4,128,1,float16,float16,0,0.07106080055236816
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,12,1,128,1,fp8,fp8,0,0.04370239973068237
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,12,2,128,1,float16,float16,0,0.04636319875717163
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,12,2,128,1,float16,fp8,0,0.04834080040454865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,12,2,128,1,fp8,fp8,0,0.04815039932727814
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,12,4,128,1,float16,fp8,0,0.07316640019416809
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,12,4,128,1,fp8,fp8,0,0.07388319969177246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,12,12,128,1,float16,float16,0,0.08970720171928406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,12,12,128,1,float16,fp8,0,0.09599999785423279
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,12,12,128,1,fp8,fp8,0,0.0960864007472992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,12,1,128,1,float16,float16,0,0.029468798637390138
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,12,1,128,1,float16,fp8,0,0.031086400151252747
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,12,1,128,1,fp8,fp8,0,0.030742400884628297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,12,2,128,1,float16,float16,0,0.030030399560928345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,12,2,128,1,float16,fp8,0,0.03171679973602295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,12,2,128,1,fp8,fp8,0,0.03177599906921387
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,12,4,128,1,float16,float16,0,0.04173760116100311
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,12,4,128,1,float16,fp8,0,0.04543200135231018
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,12,4,128,1,fp8,fp8,0,0.045105600357055665
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,12,12,128,1,float16,float16,0,0.05137119889259338
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,12,12,128,1,float16,fp8,0,0.05648639798164368
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,12,12,128,1,fp8,fp8,0,0.05625439882278442
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,12,1,128,1,float16,float16,0,0.02367040067911148
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,12,1,128,1,float16,fp8,0,0.02449759989976883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,12,1,128,1,fp8,fp8,0,0.02452159970998764
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,12,2,128,1,float16,float16,0,0.02393600046634674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,12,2,128,1,float16,fp8,0,0.02468159943819046
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,12,2,128,1,fp8,fp8,0,0.024745599925518037
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,12,4,128,1,float16,float16,0,0.028655999898910524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,12,4,128,1,float16,fp8,0,0.030232000350952148
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,12,12,128,1,float16,float16,0,0.03371680080890656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,12,4,128,1,fp8,fp8,0,0.030353599786758424
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,12,12,128,1,float16,fp8,0,0.03598400056362152
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,12,12,128,1,fp8,fp8,0,0.03569760024547577
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,12,1,128,1,float16,float16,0,0.022622400522232057
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,12,1,128,1,float16,fp8,0,0.0236272007226944
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,12,1,128,1,fp8,fp8,0,0.023662400245666505
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,12,2,128,1,float16,float16,0,0.022569599747657775
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,12,2,128,1,float16,fp8,0,0.023852799832820893
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,12,2,128,1,fp8,fp8,0,0.02356960028409958
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,12,4,128,1,float16,float16,0,0.023131200671195985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,12,4,128,1,float16,fp8,0,0.023815999925136565
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,12,4,128,1,fp8,fp8,0,0.0237184002995491
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,12,12,128,1,float16,float16,0,0.02569279968738556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,12,12,128,1,float16,fp8,0,0.027140799164772033
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,12,2,128,1,float16,fp8,0,0.021873599290847777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,12,12,128,1,fp8,fp8,0,0.027156800031661987
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,12,1,128,1,float16,float16,0,0.020982399582862854
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,12,1,128,1,float16,fp8,0,0.021990400552749634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,12,1,128,1,fp8,fp8,0,0.02189279943704605
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,12,2,128,1,float16,float16,0,0.021044799685478212
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,12,2,128,1,fp8,fp8,0,0.02215680032968521
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,12,4,128,1,float16,float16,0,0.02107519954442978
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,12,4,128,1,float16,fp8,0,0.021803200244903564
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,12,4,128,1,fp8,fp8,0,0.02213599979877472
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,12,12,128,1,float16,float16,0,0.020662400126457214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,12,12,128,1,float16,fp8,0,0.02174399942159653
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,12,12,128,1,fp8,fp8,0,0.021667200326919555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,12,1,128,1,float16,float16,0,0.02017440050840378
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,12,1,128,1,float16,fp8,0,0.02136159986257553
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,12,1,128,1,fp8,fp8,0,0.02093600034713745
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,12,2,128,1,float16,float16,0,0.02025440037250519
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,12,2,128,1,float16,fp8,0,0.02075359970331192
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,12,2,128,1,fp8,fp8,0,0.020692799985408784
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,12,4,128,1,float16,float16,0,0.02056799978017807
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,12,4,128,1,float16,fp8,0,0.021411199867725373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,12,4,128,1,fp8,fp8,0,0.02120479941368103
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,12,12,128,1,float16,float16,0,0.020552000403404234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,12,12,128,1,float16,fp8,0,0.021190400421619415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,12,12,128,1,fp8,fp8,0,0.02083680033683777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,12,1,128,1,float16,float16,0,0.019406400620937347
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,12,1,128,1,float16,fp8,0,0.02014880031347275
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,12,1,128,1,fp8,fp8,0,0.020452800393104553
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,12,2,128,1,float16,float16,0,0.019555200636386872
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,12,2,128,1,float16,fp8,0,0.020446400344371795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,12,2,128,1,fp8,fp8,0,0.020446400344371795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,12,4,128,1,float16,float16,0,0.019702400267124175
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,12,4,128,1,float16,fp8,0,0.02027360051870346
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,12,4,128,1,fp8,fp8,0,0.02048799991607666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,12,12,128,1,float16,float16,0,0.019889600574970245
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,12,12,128,1,float16,fp8,0,0.020791999995708466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,12,12,128,1,fp8,fp8,0,0.021063999831676485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,12,1,128,1,float16,float16,0,0.019636799395084382
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,12,1,128,1,float16,fp8,0,0.02051679939031601
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,12,1,128,1,fp8,fp8,0,0.020367999374866486
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,12,2,128,1,float16,float16,0,0.019433599710464478
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,12,2,128,1,float16,fp8,0,0.020103999972343446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,12,2,128,1,fp8,fp8,0,0.01998399943113327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,12,4,128,1,float16,float16,0,0.01958560049533844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,12,4,128,1,float16,fp8,0,0.02054399996995926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,12,4,128,1,fp8,fp8,0,0.020345599949359895
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,12,12,128,1,float16,float16,0,0.019755199551582336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,12,12,128,1,float16,fp8,0,0.020559999346733093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,12,12,128,1,fp8,fp8,0,0.020263999700546265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,12,1,128,1,float16,float16,0,0.019339199364185333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,12,1,128,1,float16,fp8,0,0.02006080001592636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,12,1,128,1,fp8,fp8,0,0.020238399505615234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,12,2,128,1,float16,float16,0,0.019228799641132353
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,12,2,128,1,float16,fp8,0,0.02004159986972809
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,12,2,128,1,fp8,fp8,0,0.020265600085258482
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,12,4,128,1,float16,float16,0,0.019273599982261656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,12,4,128,1,float16,fp8,0,0.02003519982099533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,12,4,128,1,fp8,fp8,0,0.02006080001592636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,12,1,128,1,float16,float16,0,0.029492801427841185
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,12,1,128,1,float16,fp8,0,0.03145120143890381
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,12,1,128,1,fp8,fp8,0,0.03144159913063049
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,12,2,128,1,float16,float16,0,0.03728800117969513
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,12,2,128,1,float16,fp8,0,0.039900800585746764
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,12,2,128,1,fp8,fp8,0,0.04012959897518158
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,12,4,128,1,float16,float16,0,0.052027201652526854
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,12,4,128,1,float16,fp8,0,0.058134400844573976
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,12,4,128,1,fp8,fp8,0,0.058417600393295285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,12,12,128,1,float16,float16,0,0.06374719738960266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,12,12,128,1,float16,fp8,0,0.07370399832725524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,12,12,128,1,fp8,fp8,0,0.07322880029678344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,12,1,128,1,float16,float16,0,0.02311680018901825
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,12,1,128,1,float16,fp8,0,0.023788799345493317
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,12,1,128,1,fp8,fp8,0,0.024099199473857878
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,12,2,128,1,float16,float16,0,0.026660799980163574
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,12,2,128,1,float16,fp8,0,0.028465598821640015
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,12,2,128,1,fp8,fp8,0,0.028545600175857545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,12,4,128,1,float16,float16,0,0.034332799911499026
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,12,4,128,1,float16,fp8,0,0.037334400415420535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,12,4,128,1,fp8,fp8,0,0.03745119869709015
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,12,12,128,1,float16,float16,0,0.04015679955482483
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,12,12,128,1,float16,fp8,0,0.04533439874649048
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,12,2,128,1,fp8,fp8,0,0.02228800058364868
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,12,12,128,1,fp8,fp8,0,0.044809600710868834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,12,4,128,1,float16,float16,0,0.025224000215530396
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,12,1,128,1,float16,float16,0,0.02083359956741333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,12,1,128,1,float16,fp8,0,0.021817600727081297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,12,1,128,1,fp8,fp8,0,0.02142080068588257
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,12,2,128,1,float16,float16,0,0.021121600270271303
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,12,2,128,1,float16,fp8,0,0.022363199293613432
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,12,4,128,1,float16,fp8,0,0.02686559855937958
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,12,1,128,1,fp8,fp8,0,0.02086080014705658
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,12,4,128,1,fp8,fp8,0,0.02690559923648834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,12,12,128,1,float16,float16,0,0.028457599878311157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,12,12,128,1,float16,fp8,0,0.030670401453971863
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,12,12,128,1,fp8,fp8,0,0.030422401428222657
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,12,1,128,1,float16,float16,0,0.01993599981069565
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,12,1,128,1,float16,fp8,0,0.020692799985408784
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,12,2,128,1,float16,float16,0,0.020446400344371795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,12,2,128,1,float16,fp8,0,0.021267199516296388
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,12,2,128,1,fp8,fp8,0,0.02144159972667694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,12,4,128,1,float16,float16,0,0.020633600652217865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,12,4,128,1,float16,fp8,0,0.02168480008840561
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,12,4,128,1,fp8,fp8,0,0.021559999883174898
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,12,12,128,1,float16,float16,0,0.023601600527763368
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,12,12,128,1,float16,fp8,0,0.025217598676681517
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,12,12,128,1,fp8,fp8,0,0.02550239861011505
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,12,1,128,1,float16,float16,0,0.019356800615787505
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,12,1,128,1,float16,fp8,0,0.020291200280189513
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,12,1,128,1,fp8,fp8,0,0.020363199710845947
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,12,2,128,1,float16,float16,0,0.0196943998336792
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,12,2,128,1,float16,fp8,0,0.020367999374866486
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,12,2,128,1,fp8,fp8,0,0.020265600085258482
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,12,4,128,1,float16,float16,0,0.01969279944896698
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,12,4,128,1,float16,fp8,0,0.020759999752044678
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,12,4,128,1,fp8,fp8,0,0.020894399285316466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,12,12,128,1,float16,float16,0,0.01993280053138733
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,12,12,128,1,float16,fp8,0,0.021158400177955627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,12,12,128,1,fp8,fp8,0,0.020865599811077117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,12,1,128,1,float16,float16,0,0.01926400065422058
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,12,1,128,1,float16,fp8,0,0.01979839950799942
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,12,1,128,1,fp8,fp8,0,0.01996160000562668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,12,2,128,1,float16,float16,0,0.019121600687503813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,12,2,128,1,float16,fp8,0,0.02021760046482086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,12,2,128,1,fp8,fp8,0,0.02030239999294281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,12,4,128,1,float16,float16,0,0.019675199687480927
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,12,4,128,1,float16,fp8,0,0.020443199574947356
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,12,4,128,1,fp8,fp8,0,0.020241600275039674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,12,12,128,1,float16,float16,0,0.01956319957971573
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,12,12,128,1,float16,fp8,0,0.020372800529003143
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,12,12,128,1,fp8,fp8,0,0.02057439982891083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,12,1,128,1,float16,float16,0,0.019009600579738616
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,12,1,128,1,float16,fp8,0,0.01984640061855316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,12,1,128,1,fp8,fp8,0,0.019972799718379973
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,12,2,128,1,float16,float16,0,0.01900479942560196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,12,2,128,1,float16,fp8,0,0.019761599600315094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,12,2,128,1,fp8,fp8,0,0.019732800126075745
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,12,4,128,1,float16,float16,0,0.01910399943590164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,12,4,128,1,float16,fp8,0,0.020019200444221497
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,12,1,128,1,float16,fp8,0,0.017846399545669557
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,12,2,128,1,float16,float16,0,0.01902559995651245
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,12,4,128,1,fp8,fp8,0,0.019963200390338897
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,12,12,128,1,float16,float16,0,0.019515199959278105
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,12,12,128,1,float16,fp8,0,0.020448000729084016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,12,12,128,1,fp8,fp8,0,0.02028000056743622
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,12,1,128,1,float16,float16,0,0.017262400686740877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,12,1,128,1,fp8,fp8,0,0.01780479997396469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,12,2,128,1,float16,fp8,0,0.019489599764347075
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,12,2,128,1,fp8,fp8,0,0.01992480009794235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,12,4,128,1,float16,float16,0,0.019204799830913544
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,12,4,128,1,float16,fp8,0,0.019828799366950988
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,12,4,128,1,fp8,fp8,0,0.019968000054359437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,12,12,128,1,float16,float16,0,0.01905920058488846
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,12,12,128,1,float16,fp8,0,0.019644799828529357
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,12,12,128,1,fp8,fp8,0,0.01956319957971573
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,12,1,128,1,float16,float16,0,0.016275200247764587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,12,1,128,1,float16,fp8,0,0.01719679981470108
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,12,1,128,1,fp8,fp8,0,0.01730719953775406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,12,2,128,1,float16,float16,0,0.017056000232696534
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,12,2,128,1,float16,fp8,0,0.01786399930715561
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,12,2,128,1,fp8,fp8,0,0.017884799838066102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,12,4,128,1,float16,float16,0,0.018825599551200868
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,12,4,128,1,float16,fp8,0,0.01942239999771118
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,12,4,128,1,fp8,fp8,0,0.019776000082492827
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,8,1,128,1,float16,fp8,0,6.434926605224609
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,8,1,128,1,fp8,fp8,0,6.416185760498047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,8,1,128,1,float16,float16,0,7.623513793945312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,8,2,128,1,float16,fp8,0,6.610564422607422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,8,2,128,1,fp8,fp8,0,6.591728210449219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,8,2,128,1,float16,float16,0,7.824329376220703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,8,4,128,1,float16,float16,0,7.891017913818359
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,8,8,128,1,float16,float16,0,4.219870376586914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,8,1,128,1,float16,float16,0,3.730739212036133
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,8,8,128,1,fp8,fp8,0,3.772990417480469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,8,8,128,1,float16,fp8,0,3.8100624084472656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,8,4,128,1,float16,fp8,0,6.867308807373047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,8,1,128,1,float16,fp8,0,3.3477855682373048
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,8,1,128,1,fp8,fp8,0,3.3115776062011717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,8,4,128,1,fp8,fp8,0,6.871155548095703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,8,8,128,1,float16,float16,0,2.019268798828125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,8,2,128,1,float16,float16,0,3.8319950103759766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,8,2,128,1,float16,fp8,0,3.391806411743164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,8,2,128,1,fp8,fp8,0,3.3185134887695313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,8,8,128,1,float16,fp8,0,1.8961711883544923
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,8,4,128,1,float16,fp8,0,3.4553089141845703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,8,4,128,1,float16,float16,0,3.9176673889160156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,8,4,128,1,fp8,fp8,0,3.4536064147949217
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,8,8,128,1,fp8,fp8,0,1.982454490661621
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,8,1,128,1,float16,float16,0,1.7961280822753907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,8,1,128,1,float16,fp8,0,1.6244176864624023
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,8,1,128,1,fp8,fp8,0,1.6394975662231446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,8,2,128,1,float16,fp8,0,1.6667903900146483
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,8,2,128,1,float16,float16,0,1.7800432205200196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,8,2,128,1,fp8,fp8,0,1.7619680404663085
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,8,4,128,1,float16,float16,0,1.891499137878418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,8,8,128,1,float16,float16,0,1.0177424430847168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,8,4,128,1,float16,fp8,0,1.7478015899658204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,8,8,128,1,float16,fp8,0,0.9710320472717285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,8,8,128,1,fp8,fp8,0,0.9665679931640625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,8,4,128,1,fp8,fp8,0,1.8276992797851563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,8,1,128,1,float16,float16,0,0.8861840248107911
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,8,1,128,1,float16,fp8,0,0.8328304290771484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,8,1,128,1,fp8,fp8,0,0.8819472312927246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,8,2,128,1,float16,float16,0,0.9011648178100586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,8,2,128,1,float16,fp8,0,0.8509632110595703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,8,2,128,1,fp8,fp8,0,0.8585391998291015
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,8,4,128,1,float16,float16,0,0.9393456459045411
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,8,4,128,1,float16,fp8,0,0.8951616287231445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,8,4,128,1,fp8,fp8,0,0.9076592445373535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,8,1,128,1,float16,fp8,0,3.730606460571289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,8,1,128,1,float16,float16,0,4.086824035644531
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,8,1,128,1,fp8,fp8,0,3.7204513549804688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,8,2,128,1,float16,fp8,0,3.811240005493164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,8,2,128,1,float16,float16,0,4.37739372253418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,8,2,128,1,fp8,fp8,0,3.808676910400391
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,8,4,128,1,float16,fp8,0,4.035836791992187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,8,4,128,1,float16,float16,0,4.665617752075195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,8,8,128,1,float16,float16,0,2.515100860595703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,8,8,128,1,float16,fp8,0,2.2578256607055662
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,8,1,128,1,float16,float16,0,2.076136016845703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,8,1,128,1,float16,fp8,0,1.862548828125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,8,8,128,1,fp8,fp8,0,2.241611289978027
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,8,1,128,1,fp8,fp8,0,1.8604255676269532
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,8,4,128,1,fp8,fp8,0,4.03821907043457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,8,2,128,1,float16,float16,0,2.231209564208984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,8,2,128,1,float16,fp8,0,1.9722496032714845
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,8,2,128,1,fp8,fp8,0,1.9120191574096679
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,8,8,128,1,float16,float16,0,1.2451215744018556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,8,8,128,1,float16,fp8,0,1.153774356842041
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,8,4,128,1,float16,fp8,0,2.042323112487793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,8,8,128,1,fp8,fp8,0,1.2160127639770508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,8,4,128,1,float16,float16,0,2.39257755279541
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,8,4,128,1,fp8,fp8,0,2.0216272354125975
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,8,1,128,1,float16,float16,0,1.0033408164978028
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,8,1,128,1,float16,fp8,0,0.9420576095581055
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,8,1,128,1,fp8,fp8,0,0.9480480194091797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,8,2,128,1,float16,float16,0,1.035916805267334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,8,2,128,1,float16,fp8,0,0.9737664222717285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,8,2,128,1,fp8,fp8,0,0.9882399559020996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,8,4,128,1,float16,float16,0,1.1541664123535156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,8,4,128,1,float16,fp8,0,1.028700828552246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,8,8,128,1,float16,float16,0,0.6609248161315918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,8,4,128,1,fp8,fp8,0,1.0429120063781738
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,8,8,128,1,float16,fp8,0,0.6041135787963867
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,8,8,128,1,fp8,fp8,0,0.6182159900665283
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,8,1,128,1,float16,float16,0,0.5084144115447998
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,8,1,128,1,float16,fp8,0,0.48969759941101076
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,8,1,128,1,fp8,fp8,0,0.5009632110595703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,8,2,128,1,float16,float16,0,0.5265888214111328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,8,2,128,1,float16,fp8,0,0.5024191856384277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,8,2,128,1,fp8,fp8,0,0.5041952133178711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,8,4,128,1,float16,float16,0,0.5648384094238281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,8,4,128,1,float16,fp8,0,0.5313648223876953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,8,4,128,1,fp8,fp8,0,0.5322432041168212
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,8,1,128,1,float16,fp8,0,2.609601593017578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,8,1,128,1,float16,float16,0,2.8618383407592773
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,8,1,128,1,fp8,fp8,0,2.614687919616699
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,8,2,128,1,float16,float16,0,2.955776023864746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,8,2,128,1,float16,fp8,0,2.6856687545776365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,8,2,128,1,fp8,fp8,0,2.7054927825927733
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,8,4,128,1,float16,float16,0,3.282231903076172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,8,8,128,1,float16,float16,0,1.7962848663330078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,8,8,128,1,float16,fp8,0,1.7520000457763671
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,8,1,128,1,float16,float16,0,1.4166831970214844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,8,8,128,1,fp8,fp8,0,1.650062370300293
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,8,4,128,1,float16,fp8,0,2.896988868713379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,8,4,128,1,fp8,fp8,0,2.983683204650879
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,8,1,128,1,float16,fp8,0,1.510636806488037
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,8,1,128,1,fp8,fp8,0,1.42542724609375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,8,2,128,1,float16,float16,0,1.460697555541992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,8,2,128,1,float16,fp8,0,1.4568304061889648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,8,2,128,1,fp8,fp8,0,1.3630512237548829
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,8,4,128,1,float16,float16,0,1.5773023605346679
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,8,8,128,1,float16,float16,0,0.9299712181091309
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,8,4,128,1,float16,fp8,0,1.4803088188171387
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,8,8,128,1,float16,fp8,0,0.8879103660583496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,8,4,128,1,fp8,fp8,0,1.4947567939758302
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,8,1,128,1,float16,float16,0,0.7189568042755127
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,8,1,128,1,float16,fp8,0,0.674505615234375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,8,8,128,1,fp8,fp8,0,0.8516464233398438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,8,1,128,1,fp8,fp8,0,0.7395648002624512
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,8,2,128,1,float16,float16,0,0.738651180267334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,8,2,128,1,float16,fp8,0,0.7080944061279297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,8,2,128,1,fp8,fp8,0,0.7057807922363282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,8,4,128,1,float16,float16,0,0.8128911972045898
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,8,4,128,1,float16,fp8,0,0.7571616172790527
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,8,4,128,1,fp8,fp8,0,0.747273588180542
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,8,8,128,1,float16,float16,0,0.4941135883331299
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,8,8,128,1,float16,fp8,0,0.45382399559020997
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,8,8,128,1,fp8,fp8,0,0.4488976001739502
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,8,1,128,1,float16,float16,0,0.37324159145355223
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,8,1,128,1,float16,fp8,0,0.3503871917724609
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,8,1,128,1,fp8,fp8,0,0.3594511985778809
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,8,4,128,1,float16,fp8,0,0.3935647964477539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,8,2,128,1,float16,float16,0,0.385100793838501
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,8,2,128,1,float16,fp8,0,0.36333279609680175
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,8,2,128,1,fp8,fp8,0,0.3651103973388672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,8,4,128,1,float16,float16,0,0.42431039810180665
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,8,4,128,1,fp8,fp8,0,0.39469759464263915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,8,1,128,1,float16,fp8,0,3.4138416290283202
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,8,1,128,1,float16,float16,0,3.783814239501953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,8,1,128,1,fp8,fp8,0,3.430246353149414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,8,2,128,1,float16,fp8,0,3.5420272827148436
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,8,2,128,1,fp8,fp8,0,3.554880142211914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,8,2,128,1,float16,float16,0,4.135158538818359
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,8,4,128,1,float16,fp8,0,3.882721710205078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,8,4,128,1,float16,float16,0,4.288603210449219
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,8,8,128,1,float16,float16,0,2.421401596069336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,8,1,128,1,float16,float16,0,1.8549423217773438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,8,8,128,1,float16,fp8,0,2.2375455856323243
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,8,1,128,1,float16,fp8,0,1.8033712387084961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,8,8,128,1,fp8,fp8,0,2.224048042297363
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,8,1,128,1,fp8,fp8,0,1.716231918334961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,8,4,128,1,fp8,fp8,0,3.8635807037353516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,8,2,128,1,float16,float16,0,1.9966720581054687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,8,2,128,1,float16,fp8,0,1.820083236694336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,8,2,128,1,fp8,fp8,0,1.8758256912231446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,8,8,128,1,float16,float16,0,1.1892144203186035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,8,4,128,1,float16,float16,0,2.1097904205322267
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,8,8,128,1,float16,fp8,0,1.1377504348754883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,8,4,128,1,float16,fp8,0,1.950404739379883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,8,8,128,1,fp8,fp8,0,1.191748809814453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,8,4,128,1,fp8,fp8,0,1.939112091064453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,8,1,128,1,float16,float16,0,0.9258607864379883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,8,1,128,1,float16,fp8,0,0.8806495666503906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,8,1,128,1,fp8,fp8,0,0.8737536430358886
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,8,2,128,1,float16,float16,0,1.0011136054992675
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,8,2,128,1,float16,fp8,0,0.9151151657104493
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,8,2,128,1,fp8,fp8,0,0.9136544227600097
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,8,4,128,1,float16,float16,0,1.0446512222290039
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,8,4,128,1,float16,fp8,0,0.9857312202453613
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,8,8,128,1,float16,float16,0,0.6058335781097413
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,8,1,128,1,float16,fp8,0,0.45435681343078616
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,8,4,128,1,fp8,fp8,0,0.9799983978271485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,8,8,128,1,float16,fp8,0,0.5771952152252198
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,8,8,128,1,fp8,fp8,0,0.589027214050293
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,8,1,128,1,float16,float16,0,0.4716159820556641
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,8,1,128,1,fp8,fp8,0,0.45346717834472655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,8,2,128,1,float16,float16,0,0.4847263813018799
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,8,2,128,1,float16,fp8,0,0.4713712215423584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,8,2,128,1,fp8,fp8,0,0.47208638191223146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,8,4,128,1,float16,float16,0,0.5307968139648438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,8,4,128,1,float16,fp8,0,0.5042975902557373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,8,8,128,1,float16,float16,0,0.3145056009292603
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,8,4,128,1,fp8,fp8,0,0.5059631824493408
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,8,8,128,1,float16,fp8,0,0.3023936033248901
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,8,8,128,1,fp8,fp8,0,0.30191199779510497
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,8,1,128,1,float16,float16,0,0.24657280445098878
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,8,1,128,1,float16,fp8,0,0.22979040145874025
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,8,1,128,1,fp8,fp8,0,0.22946560382843018
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,8,4,128,1,fp8,fp8,0,0.2625247955322266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,8,2,128,1,float16,float16,0,0.2558367967605591
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,8,2,128,1,float16,fp8,0,0.23810238838195802
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,8,2,128,1,fp8,fp8,0,0.23848481178283693
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,8,4,128,1,float16,float16,0,0.2772687911987305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,8,4,128,1,float16,fp8,0,0.2600111961364746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,8,1,128,1,float16,fp8,0,2.0224224090576173
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,8,1,128,1,float16,float16,0,2.1890752792358397
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,8,1,128,1,fp8,fp8,0,2.0068559646606445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,8,2,128,1,float16,float16,0,2.2519760131835938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,8,2,128,1,float16,fp8,0,2.118280029296875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,8,2,128,1,fp8,fp8,0,2.1323312759399413
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,8,4,128,1,float16,float16,0,2.4803504943847656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,8,8,128,1,float16,fp8,0,1.4611104011535645
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,8,8,128,1,float16,float16,0,1.4902560234069824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,8,1,128,1,float16,float16,0,1.0760368347167968
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,8,4,128,1,float16,fp8,0,2.3442575454711916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,8,8,128,1,fp8,fp8,0,1.408390426635742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,8,4,128,1,fp8,fp8,0,2.3974720001220704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,8,1,128,1,float16,fp8,0,1.1185680389404298
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,8,1,128,1,fp8,fp8,0,1.020854377746582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,8,2,128,1,float16,float16,0,1.1298383712768554
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,8,2,128,1,float16,fp8,0,1.0951583862304688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,8,2,128,1,fp8,fp8,0,1.0681471824645996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,8,4,128,1,float16,float16,0,1.255025577545166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,8,4,128,1,float16,fp8,0,1.1808560371398926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,8,8,128,1,float16,float16,0,0.7722991943359375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,8,8,128,1,float16,fp8,0,0.7244592189788819
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,8,4,128,1,fp8,fp8,0,1.214575958251953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,8,8,128,1,fp8,fp8,0,0.7252272129058838
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,8,1,128,1,float16,float16,0,0.5456511974334717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,8,1,128,1,float16,fp8,0,0.5422416210174561
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,8,1,128,1,fp8,fp8,0,0.5223087787628173
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,8,2,128,1,float16,float16,0,0.5759583950042725
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,8,2,128,1,float16,fp8,0,0.5524320125579834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,8,2,128,1,fp8,fp8,0,0.5571951866149902
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,8,4,128,1,float16,float16,0,0.6306399822235107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,8,4,128,1,float16,fp8,0,0.6136320114135743
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,8,8,128,1,float16,float16,0,0.4080639839172363
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,8,4,128,1,fp8,fp8,0,0.6016848087310791
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,8,8,128,1,float16,fp8,0,0.3732383966445923
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,8,8,128,1,fp8,fp8,0,0.375766396522522
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,8,1,128,1,float16,float16,0,0.2842080116271973
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,8,1,128,1,float16,fp8,0,0.27292320728302
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,8,1,128,1,fp8,fp8,0,0.272873592376709
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,8,2,128,1,float16,float16,0,0.30232479572296145
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,8,2,128,1,float16,fp8,0,0.28929600715637205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,8,2,128,1,fp8,fp8,0,0.2865648031234741
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,8,4,128,1,float16,float16,0,0.3322432041168213
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,8,4,128,1,float16,fp8,0,0.3167088031768799
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,8,4,128,1,fp8,fp8,0,0.3160288095474243
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,8,8,128,1,float16,float16,0,0.22591838836669922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,8,8,128,1,float16,fp8,0,0.20016000270843506
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,8,8,128,1,fp8,fp8,0,0.20253920555114746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,8,1,128,1,float16,float16,0,0.15650080442428588
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,8,1,128,1,float16,fp8,0,0.14712320566177367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,8,1,128,1,fp8,fp8,0,0.14656800031661987
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,8,2,128,1,float16,float16,0,0.1692255973815918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,8,2,128,1,float16,fp8,0,0.1560271978378296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,8,2,128,1,fp8,fp8,0,0.1563599944114685
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,8,4,128,1,float16,float16,0,0.18298879861831666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,8,4,128,1,float16,fp8,0,0.17459839582443237
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,8,4,128,1,fp8,fp8,0,0.16799360513687134
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,8,1,128,1,float16,float16,0,2.0526544570922853
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,8,1,128,1,float16,fp8,0,1.906051254272461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,8,1,128,1,fp8,fp8,0,1.924545669555664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,8,2,128,1,float16,float16,0,2.184596824645996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,8,2,128,1,float16,fp8,0,2.0655887603759764
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,8,2,128,1,fp8,fp8,0,2.0718303680419923
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,8,4,128,1,float16,float16,0,2.5087663650512697
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,8,4,128,1,float16,fp8,0,2.3730960845947267
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,8,8,128,1,float16,float16,0,1.559563159942627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,8,8,128,1,float16,fp8,0,1.486406421661377
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,8,1,128,1,float16,float16,0,1.0152624130249024
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,8,1,128,1,float16,fp8,0,0.97249755859375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,8,8,128,1,fp8,fp8,0,1.5095343589782715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,8,4,128,1,fp8,fp8,0,2.480273628234863
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,8,1,128,1,fp8,fp8,0,1.0432623863220214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,8,2,128,1,float16,float16,0,1.0988911628723144
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,8,2,128,1,float16,fp8,0,1.0466927528381347
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,8,2,128,1,fp8,fp8,0,1.0410016059875489
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,8,4,128,1,float16,float16,0,1.2474672317504882
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,8,4,128,1,float16,fp8,0,1.1959407806396485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,8,8,128,1,float16,float16,0,0.7827151775360107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,8,4,128,1,fp8,fp8,0,1.2013232231140136
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,8,8,128,1,float16,fp8,0,0.7638832092285156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,8,1,128,1,fp8,fp8,0,0.4977680206298828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,8,1,128,1,float16,float16,0,0.5246960163116455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,8,8,128,1,fp8,fp8,0,0.7587808132171631
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,8,1,128,1,float16,fp8,0,0.5002255916595459
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,8,2,128,1,float16,float16,0,0.5549503803253174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,8,2,128,1,float16,fp8,0,0.5402336120605469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,8,2,128,1,fp8,fp8,0,0.5388271808624268
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,8,4,128,1,float16,float16,0,0.6380176067352294
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,8,1,128,1,float16,float16,0,0.2738336086273193
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,8,4,128,1,float16,fp8,0,0.6084191799163818
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,8,8,128,1,float16,float16,0,0.40351362228393556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,8,4,128,1,fp8,fp8,0,0.6119040012359619
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,8,8,128,1,float16,fp8,0,0.3908544063568115
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,8,8,128,1,fp8,fp8,0,0.38856799602508546
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,8,1,128,1,float16,fp8,0,0.2654207944869995
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,8,1,128,1,fp8,fp8,0,0.26751840114593506
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,8,2,128,1,float16,float16,0,0.2926448106765747
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,8,2,128,1,float16,fp8,0,0.28079040050506593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,8,2,128,1,fp8,fp8,0,0.2821487903594971
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,8,4,128,1,float16,float16,0,0.32957921028137205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,8,4,128,1,float16,fp8,0,0.31602718830108645
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,8,4,128,1,fp8,fp8,0,0.3188352108001709
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,8,8,128,1,float16,float16,0,0.2149616003036499
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,8,8,128,1,float16,fp8,0,0.2072144031524658
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,8,8,128,1,fp8,fp8,0,0.20640161037445068
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,8,1,128,1,float16,float16,0,0.14514399766921998
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,8,1,128,1,float16,fp8,0,0.13565280437469482
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,8,1,128,1,fp8,fp8,0,0.13508479595184325
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,8,2,128,1,float16,float16,0,0.15765600204467772
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,8,2,128,1,float16,fp8,0,0.1446943998336792
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,8,2,128,1,fp8,fp8,0,0.14436479806900024
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,8,4,128,1,float16,float16,0,0.17622400522232057
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,8,4,128,1,float16,fp8,0,0.165011203289032
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,8,4,128,1,fp8,fp8,0,0.16666719913482667
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,8,8,128,1,float16,float16,0,0.11444159746170043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,8,8,128,1,float16,fp8,0,0.10573279857635498
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,8,8,128,1,fp8,fp8,0,0.10594719648361206
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,8,1,128,1,float16,float16,0,0.07789599895477295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,8,1,128,1,float16,fp8,0,0.0752784013748169
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,8,1,128,1,fp8,fp8,0,0.07537440061569214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,8,2,128,1,float16,float16,0,0.08214719891548157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,8,2,128,1,float16,fp8,0,0.08054720163345337
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,8,2,128,1,fp8,fp8,0,0.07919039726257324
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,8,4,128,1,float16,float16,0,0.09243999719619751
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,8,4,128,1,float16,fp8,0,0.08816159963607788
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,8,4,128,1,fp8,fp8,0,0.08799359798431397
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,8,1,128,1,float16,float16,0,1.2109472274780273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,8,1,128,1,float16,fp8,0,1.1596223831176757
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,8,1,128,1,fp8,fp8,0,1.1713184356689452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,8,2,128,1,float16,float16,0,1.3343536376953125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,8,2,128,1,float16,fp8,0,1.2779184341430665
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,8,2,128,1,fp8,fp8,0,1.2764016151428224
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,8,4,128,1,float16,float16,0,1.5485072135925293
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,8,8,128,1,float16,float16,0,1.0169391632080078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,8,4,128,1,float16,fp8,0,1.5021295547485352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,8,1,128,1,float16,float16,0,0.6180047988891602
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,8,4,128,1,fp8,fp8,0,1.49344482421875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,8,8,128,1,float16,fp8,0,1.0003487586975097
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,8,1,128,1,float16,fp8,0,0.5984511852264405
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,8,8,128,1,fp8,fp8,0,0.9808128356933594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,8,1,128,1,fp8,fp8,0,0.6082367897033691
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,8,2,128,1,float16,float16,0,0.6698143959045411
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,8,2,128,1,float16,fp8,0,0.6489391803741456
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,8,2,128,1,fp8,fp8,0,0.6555376052856445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,8,4,128,1,float16,float16,0,0.7849023818969727
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,8,4,128,1,float16,fp8,0,0.7655695915222168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,8,4,128,1,fp8,fp8,0,0.7638864040374755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,8,8,128,1,float16,float16,0,0.528272008895874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,8,8,128,1,float16,fp8,0,0.5069600105285644
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,8,8,128,1,fp8,fp8,0,0.5015376091003418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,8,1,128,1,float16,float16,0,0.3194128036499023
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,8,1,128,1,float16,fp8,0,0.3158911943435669
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,8,1,128,1,fp8,fp8,0,0.31166720390319824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,8,2,128,1,float16,float16,0,0.35011680126190187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,8,2,128,1,float16,fp8,0,0.33799519538879397
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,8,2,128,1,fp8,fp8,0,0.34020318984985354
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,8,4,128,1,float16,float16,0,0.4085440158843994
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,8,4,128,1,float16,fp8,0,0.3935120105743408
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,8,4,128,1,fp8,fp8,0,0.39355199337005614
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,8,8,128,1,float16,float16,0,0.2809344053268433
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,8,8,128,1,float16,fp8,0,0.26208319664001467
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,8,8,128,1,fp8,fp8,0,0.2649951934814453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,8,1,128,1,float16,float16,0,0.17208319902420044
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,8,1,128,1,float16,fp8,0,0.1663424015045166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,8,1,128,1,fp8,fp8,0,0.16879839897155763
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,8,2,128,1,float16,float16,0,0.18583519458770753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,8,2,128,1,float16,fp8,0,0.18190879821777345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,8,2,128,1,fp8,fp8,0,0.18411999940872192
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,8,4,128,1,float16,float16,0,0.21428320407867432
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,8,4,128,1,float16,fp8,0,0.2096735954284668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,8,4,128,1,fp8,fp8,0,0.20983519554138183
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,8,8,128,1,float16,float16,0,0.15325759649276732
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,8,8,128,1,float16,fp8,0,0.14270080327987672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,8,8,128,1,fp8,fp8,0,0.14208799600601196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,8,1,128,1,float16,float16,0,0.09368799924850464
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,8,1,128,1,float16,fp8,0,0.09170399904251099
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,8,1,128,1,fp8,fp8,0,0.09020159840583801
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,8,2,128,1,float16,float16,0,0.10388799905776977
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,8,2,128,1,float16,fp8,0,0.09731839895248413
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,8,2,128,1,fp8,fp8,0,0.09710720181465149
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,8,4,128,1,float16,float16,0,0.12155040502548217
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,8,4,128,1,float16,fp8,0,0.10937440395355225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,8,4,128,1,fp8,fp8,0,0.10927679538726806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,8,8,128,1,float16,float16,0,0.08579199910163879
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,8,8,128,1,float16,fp8,0,0.07957599759101867
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,8,8,128,1,fp8,fp8,0,0.08084319829940796
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,8,1,128,1,float16,float16,0,0.06051040291786194
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,8,1,128,1,float16,fp8,0,0.060115200281143186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,8,1,128,1,fp8,fp8,0,0.060201597213745114
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,8,2,128,1,float16,float16,0,0.06277599930763245
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,8,2,128,1,float16,fp8,0,0.06227840185165405
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,8,2,128,1,fp8,fp8,0,0.0629360020160675
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,8,4,128,1,float16,float16,0,0.06889119744300842
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,8,4,128,1,float16,fp8,0,0.06851360201835632
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,8,4,128,1,fp8,fp8,0,0.06867200136184692
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,8,1,128,1,float16,float16,0,1.2077808380126953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,8,1,128,1,float16,fp8,0,1.1775744438171387
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,8,1,128,1,fp8,fp8,0,1.1714384078979492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,8,2,128,1,float16,float16,0,1.357147216796875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,8,2,128,1,float16,fp8,0,1.3208959579467774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,8,2,128,1,fp8,fp8,0,1.313372802734375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,8,8,128,1,float16,float16,0,1.136190414428711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,8,4,128,1,float16,float16,0,1.6494783401489257
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,8,4,128,1,float16,fp8,0,1.6184383392333985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,8,4,128,1,fp8,fp8,0,1.6208240509033203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,8,8,128,1,float16,fp8,0,1.118336009979248
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,8,1,128,1,float16,float16,0,0.6164239883422852
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,8,8,128,1,fp8,fp8,0,1.1153488159179688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,8,1,128,1,float16,fp8,0,0.6000207901000977
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,8,1,128,1,fp8,fp8,0,0.6016831874847413
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,8,2,128,1,float16,float16,0,0.6909471988677979
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,8,2,128,1,float16,fp8,0,0.6721903800964355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,8,2,128,1,fp8,fp8,0,0.6736159801483155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,8,4,128,1,float16,float16,0,0.8459008216857911
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,8,4,128,1,fp8,fp8,0,0.8210448265075684
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,8,8,128,1,float16,float16,0,0.5820367813110352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,8,4,128,1,float16,fp8,0,0.8240400314331054
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,8,8,128,1,float16,fp8,0,0.5690800189971924
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,8,1,128,1,float16,float16,0,0.3220319986343384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,8,8,128,1,fp8,fp8,0,0.5706096172332764
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,8,1,128,1,float16,fp8,0,0.3133968114852905
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,8,1,128,1,fp8,fp8,0,0.3138767957687378
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,8,2,128,1,float16,float16,0,0.35730719566345215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,8,2,128,1,float16,fp8,0,0.3508575916290283
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,8,2,128,1,fp8,fp8,0,0.34939839839935305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,8,4,128,1,float16,fp8,0,0.42085280418396
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,8,4,128,1,float16,float16,0,0.43521919250488283
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,8,4,128,1,fp8,fp8,0,0.42412481307983396
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,8,8,128,1,float16,float16,0,0.3052063941955566
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,8,8,128,1,float16,fp8,0,0.2974303960800171
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,8,8,128,1,fp8,fp8,0,0.2963376045227051
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,8,1,128,1,float16,float16,0,0.17386080026626588
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,8,1,128,1,float16,fp8,0,0.17004480361938476
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,8,1,128,1,fp8,fp8,0,0.1695024013519287
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,8,2,128,1,float16,float16,0,0.1929103970527649
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,8,2,128,1,float16,fp8,0,0.1889888048171997
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,8,2,128,1,fp8,fp8,0,0.18710240125656127
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,8,4,128,1,float16,float16,0,0.2299504041671753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,8,4,128,1,float16,fp8,0,0.22384960651397706
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,8,4,128,1,fp8,fp8,0,0.22364480495452882
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,8,8,128,1,float16,float16,0,0.1632480025291443
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,8,8,128,1,float16,fp8,0,0.1588047981262207
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,8,8,128,1,fp8,fp8,0,0.15996320247650148
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,8,1,128,1,float16,float16,0,0.09591839909553528
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,8,1,128,1,float16,fp8,0,0.08956639766693116
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,8,1,128,1,fp8,fp8,0,0.09082080125808716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,8,2,128,1,float16,float16,0,0.1067952036857605
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,8,2,128,1,float16,fp8,0,0.09791200160980225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,8,2,128,1,fp8,fp8,0,0.09781439900398255
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,8,4,128,1,float16,float16,0,0.12473920583724976
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,8,4,128,1,float16,fp8,0,0.11887520551681519
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,8,4,128,1,fp8,fp8,0,0.11889439821243286
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,8,8,128,1,float16,float16,0,0.09026240110397339
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,8,8,128,1,float16,fp8,0,0.08297600150108338
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,8,8,128,1,fp8,fp8,0,0.0825872004032135
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,8,1,128,1,float16,float16,0,0.052527999877929686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,8,1,128,1,float16,fp8,0,0.052688002586364746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,8,1,128,1,fp8,fp8,0,0.05270879864692688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,8,2,128,1,float16,float16,0,0.05753440260887146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,8,2,128,1,float16,fp8,0,0.05732319951057434
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,8,2,128,1,fp8,fp8,0,0.056904000043869016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,8,4,128,1,float16,float16,0,0.06695200204849243
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,8,4,128,1,float16,fp8,0,0.06433759927749634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,8,4,128,1,fp8,fp8,0,0.06464959979057312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,8,8,128,1,float16,float16,0,0.059356802701950075
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,8,8,128,1,float16,fp8,0,0.058487999439239505
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,8,8,128,1,fp8,fp8,0,0.058905601501464844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,8,1,128,1,float16,float16,0,0.046795201301574704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,8,1,128,1,float16,fp8,0,0.046649599075317384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,8,1,128,1,fp8,fp8,0,0.048204800486564635
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,8,2,128,1,float16,float16,0,0.0475951999425888
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,8,2,128,1,float16,fp8,0,0.04728800058364868
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,8,2,128,1,fp8,fp8,0,0.047460800409317015
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,8,4,128,1,float16,float16,0,0.05152959823608398
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,8,4,128,1,float16,fp8,0,0.051601600646972653
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,8,1,128,1,float16,float16,0,0.8389552116394043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,8,4,128,1,fp8,fp8,0,0.05161920189857483
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,8,1,128,1,fp8,fp8,0,0.8246623992919921
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,8,1,128,1,float16,fp8,0,0.8205103874206543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,8,2,128,1,float16,float16,0,0.9533151626586914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,8,2,128,1,float16,fp8,0,0.9286928176879883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,8,2,128,1,fp8,fp8,0,0.9259119987487793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,8,4,128,1,float16,float16,0,1.1770992279052734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,8,8,128,1,float16,float16,0,0.8279199600219727
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,8,4,128,1,float16,fp8,0,1.156611156463623
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,8,1,128,1,float16,float16,0,0.43489599227905273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,8,4,128,1,fp8,fp8,0,1.1619759559631349
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,8,8,128,1,float16,fp8,0,0.813265609741211
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,8,1,128,1,float16,fp8,0,0.42471837997436523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,8,8,128,1,fp8,fp8,0,0.8128543853759765
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,8,1,128,1,fp8,fp8,0,0.4272463798522949
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,8,2,128,1,float16,float16,0,0.49028000831604
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,8,2,128,1,float16,fp8,0,0.47775840759277344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,8,2,128,1,fp8,fp8,0,0.4791855812072754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,8,4,128,1,float16,float16,0,0.6048031806945801
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,8,4,128,1,float16,fp8,0,0.5944352149963379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,8,4,128,1,fp8,fp8,0,0.5928768157958985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,8,8,128,1,float16,float16,0,0.4280496120452881
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,8,8,128,1,float16,fp8,0,0.4194176197052002
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,8,8,128,1,fp8,fp8,0,0.4202752113342285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,8,1,128,1,float16,float16,0,0.2318959951400757
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,8,1,128,1,float16,fp8,0,0.22758560180664061
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,8,1,128,1,fp8,fp8,0,0.22790238857269288
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,8,2,128,1,float16,float16,0,0.25803360939025877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,8,2,128,1,float16,fp8,0,0.2557248115539551
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,8,2,128,1,fp8,fp8,0,0.2548223972320557
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,8,4,128,1,float16,float16,0,0.3159679889678955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,8,4,128,1,float16,fp8,0,0.3108720064163208
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,8,4,128,1,fp8,fp8,0,0.31019999980926516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,8,8,128,1,float16,float16,0,0.22654080390930176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,8,8,128,1,float16,fp8,0,0.22198240756988524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,8,8,128,1,fp8,fp8,0,0.22339038848876952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,8,1,128,1,float16,float16,0,0.12791199684143068
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,8,1,128,1,float16,fp8,0,0.12705119848251342
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,8,1,128,1,fp8,fp8,0,0.12618399858474733
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,8,2,128,1,float16,float16,0,0.14205440282821655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,8,2,128,1,float16,fp8,0,0.14035520553588868
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,8,2,128,1,fp8,fp8,0,0.1400256037712097
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,8,4,128,1,float16,float16,0,0.17052960395812988
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,8,4,128,1,float16,fp8,0,0.1684224009513855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,8,4,128,1,fp8,fp8,0,0.16853920221328736
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,8,8,128,1,float16,float16,0,0.12580000162124633
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,8,8,128,1,float16,fp8,0,0.12127519845962524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,8,8,128,1,fp8,fp8,0,0.1213647961616516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,8,1,128,1,float16,float16,0,0.07061439752578735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,8,1,128,1,float16,fp8,0,0.07028800249099731
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,8,1,128,1,fp8,fp8,0,0.07006239891052246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,8,2,128,1,float16,float16,0,0.07977759838104248
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,8,2,128,1,float16,fp8,0,0.07694879770278931
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,8,2,128,1,fp8,fp8,0,0.07668319940567017
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,8,4,128,1,float16,float16,0,0.09632800221443176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,8,4,128,1,float16,fp8,0,0.08895519971847535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,8,4,128,1,fp8,fp8,0,0.09236479997634887
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,8,8,128,1,float16,float16,0,0.06881920099258423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,8,8,128,1,float16,fp8,0,0.06530399918556214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,8,8,128,1,fp8,fp8,0,0.06525120139122009
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,8,1,128,1,float16,float16,0,0.04488160014152527
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,8,1,128,1,float16,fp8,0,0.045824000239372255
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,8,1,128,1,fp8,fp8,0,0.04593760073184967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,8,2,128,1,float16,float16,0,0.047137600183486936
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,8,2,128,1,float16,fp8,0,0.04812479913234711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,8,2,128,1,fp8,fp8,0,0.04840799868106842
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,8,4,128,1,float16,float16,0,0.0542464017868042
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,8,4,128,1,float16,fp8,0,0.05463520288467407
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,8,4,128,1,fp8,fp8,0,0.054655998945236206
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,8,8,128,1,float16,float16,0,0.05869920253753662
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,8,8,128,1,float16,fp8,0,0.05916000008583069
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,8,8,128,1,fp8,fp8,0,0.05884640216827393
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,8,1,128,1,float16,float16,0,0.04423199892044068
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,8,1,128,1,float16,fp8,0,0.0448496013879776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,8,1,128,1,fp8,fp8,0,0.044896000623703004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,8,2,128,1,float16,float16,0,0.047620800137519834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,8,2,128,1,float16,fp8,0,0.04853439927101135
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,8,2,128,1,fp8,fp8,0,0.04849120080471039
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,8,4,128,1,float16,float16,0,0.051652801036834714
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,8,4,128,1,float16,fp8,0,0.05250080227851868
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,8,4,128,1,fp8,fp8,0,0.05269439816474915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,8,1,128,1,float16,float16,0,0.9146016120910645
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,8,1,128,1,float16,fp8,0,0.9051759719848633
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,8,1,128,1,fp8,fp8,0,0.9056032180786133
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,8,2,128,1,float16,float16,0,1.06387357711792
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,8,2,128,1,float16,fp8,0,1.0506591796875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,8,2,128,1,fp8,fp8,0,1.0481120109558106
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,8,4,128,1,float16,float16,0,1.3572015762329102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,8,4,128,1,float16,fp8,0,1.3488592147827148
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,8,8,128,1,float16,float16,0,0.9934944152832031
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,8,4,128,1,fp8,fp8,0,1.3546640396118164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,8,1,128,1,float16,float16,0,0.476423978805542
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,8,8,128,1,float16,fp8,0,0.9854991912841797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,8,8,128,1,fp8,fp8,0,0.9848192214965821
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,8,1,128,1,float16,fp8,0,0.46730880737304686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,8,1,128,1,fp8,fp8,0,0.46655840873718263
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,8,2,128,1,float16,float16,0,0.544707202911377
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,8,2,128,1,float16,fp8,0,0.5397583961486816
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,8,2,128,1,fp8,fp8,0,0.5399680137634277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,8,4,128,1,float16,float16,0,0.7004335880279541
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,8,4,128,1,float16,fp8,0,0.6912479877471924
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,8,4,128,1,fp8,fp8,0,0.690667200088501
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,8,8,128,1,float16,float16,0,0.5133471965789795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,8,8,128,1,float16,fp8,0,0.5057568073272705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,8,8,128,1,fp8,fp8,0,0.5056000232696534
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,8,1,128,1,float16,float16,0,0.25316319465637205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,8,1,128,1,float16,fp8,0,0.25137441158294677
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,8,1,128,1,fp8,fp8,0,0.2503711938858032
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,8,2,128,1,float16,float16,0,0.2888175964355469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,8,2,128,1,float16,fp8,0,0.2845360040664673
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,8,2,128,1,fp8,fp8,0,0.2847872018814087
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,8,4,128,1,float16,float16,0,0.3618351936340332
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,8,4,128,1,float16,fp8,0,0.3591311931610107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,8,4,128,1,fp8,fp8,0,0.3604752063751221
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,8,8,128,1,float16,float16,0,0.27128479480743406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,8,8,128,1,float16,fp8,0,0.26735680103302
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,8,8,128,1,fp8,fp8,0,0.2679856061935425
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,8,1,128,1,float16,float16,0,0.14031519889831542
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,8,1,128,1,float16,fp8,0,0.13967519998550415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,8,1,128,1,fp8,fp8,0,0.13981120586395263
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,8,2,128,1,float16,float16,0,0.15847359895706176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,8,2,128,1,float16,fp8,0,0.15846879482269288
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,8,2,128,1,fp8,fp8,0,0.15829440355300903
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,8,4,128,1,float16,float16,0,0.1964959979057312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,8,4,128,1,float16,fp8,0,0.1953920006752014
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,8,4,128,1,fp8,fp8,0,0.1952831983566284
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,8,8,128,1,float16,float16,0,0.1484976053237915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,8,8,128,1,float16,fp8,0,0.14842720031738282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,8,8,128,1,fp8,fp8,0,0.14833120107650757
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,8,1,128,1,float16,float16,0,0.08171679973602294
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,8,1,128,1,float16,fp8,0,0.0822704017162323
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,8,1,128,1,fp8,fp8,0,0.08044000267982483
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,8,2,128,1,float16,float16,0,0.0924448013305664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,8,2,128,1,float16,fp8,0,0.08627200126647949
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,8,2,128,1,fp8,fp8,0,0.08891519904136658
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,8,4,128,1,float16,float16,0,0.11188960075378418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,8,4,128,1,float16,fp8,0,0.109716796875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,8,4,128,1,fp8,fp8,0,0.10965119600296021
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,8,8,128,1,float16,float16,0,0.08797119855880738
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,8,8,128,1,float16,fp8,0,0.07963680028915406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,8,8,128,1,fp8,fp8,0,0.07986400127410889
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,8,1,128,1,float16,float16,0,0.05036479830741882
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,8,1,128,1,float16,fp8,0,0.05128960013389587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,8,1,128,1,fp8,fp8,0,0.051419198513031006
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,8,4,128,1,float16,fp8,0,0.06317920088768006
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,8,2,128,1,float16,float16,0,0.055537599325180056
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,8,2,128,1,float16,fp8,0,0.05574880242347717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,8,2,128,1,fp8,fp8,0,0.05587040185928345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,8,4,128,1,float16,float16,0,0.06351839900016784
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,8,1,128,1,float16,float16,0,0.037108799815177916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,8,4,128,1,fp8,fp8,0,0.06292799711227418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,8,8,128,1,float16,float16,0,0.04992640018463135
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,8,8,128,1,float16,fp8,0,0.05106880068778992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,8,8,128,1,fp8,fp8,0,0.051481598615646364
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,8,1,128,1,float16,fp8,0,0.038550400733947755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,8,1,128,1,fp8,fp8,0,0.038289600610733034
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,8,2,128,1,float16,fp8,0,0.039480000734329224
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,8,2,128,1,float16,float16,0,0.038099199533462524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,8,2,128,1,fp8,fp8,0,0.03916319906711578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,8,4,128,1,float16,float16,0,0.04252800047397613
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,8,4,128,1,float16,fp8,0,0.043812799453735354
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,8,4,128,1,fp8,fp8,0,0.04408639967441559
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,8,8,128,1,float16,float16,0,0.04115520119667053
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,8,8,128,1,float16,fp8,0,0.04261119961738587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,8,8,128,1,fp8,fp8,0,0.04312160015106201
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,8,1,128,1,float16,float16,0,0.03528479933738708
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,8,1,128,1,float16,fp8,0,0.0368800014257431
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,8,1,128,1,fp8,fp8,0,0.037350401282310486
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,8,2,128,1,float16,float16,0,0.036048001050949095
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,8,2,128,1,float16,fp8,0,0.03758560121059418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,8,2,128,1,fp8,fp8,0,0.03749600052833557
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,8,4,128,1,float16,float16,0,0.03644640147686005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,8,4,128,1,float16,fp8,0,0.038577601313591
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,8,4,128,1,fp8,fp8,0,0.03844960033893585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,8,1,128,1,float16,float16,0,0.6222799777984619
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,8,1,128,1,float16,fp8,0,0.6297664165496826
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,8,1,128,1,fp8,fp8,0,0.6294352054595947
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,8,2,128,1,float16,float16,0,0.7689119815826416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,8,2,128,1,float16,fp8,0,0.7698351860046386
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,8,2,128,1,fp8,fp8,0,0.7680560111999511
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,8,4,128,1,float16,float16,0,1.0694704055786133
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,8,4,128,1,float16,fp8,0,1.076249599456787
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,8,8,128,1,float16,float16,0,0.8462224006652832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,8,4,128,1,fp8,fp8,0,1.0731776237487793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,8,1,128,1,float16,float16,0,0.32389121055603026
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,8,8,128,1,float16,fp8,0,0.8476880073547364
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,8,8,128,1,fp8,fp8,0,0.8475088119506836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,8,1,128,1,float16,fp8,0,0.3284287929534912
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,8,1,128,1,fp8,fp8,0,0.326694393157959
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,8,2,128,1,float16,float16,0,0.39812641143798827
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,8,2,128,1,float16,fp8,0,0.39804959297180176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,8,2,128,1,fp8,fp8,0,0.39649760723114014
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,8,4,128,1,float16,float16,0,0.5476431846618652
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,8,1,128,1,float16,float16,0,0.17539360523223876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,8,4,128,1,float16,fp8,0,0.5495759963989257
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,8,4,128,1,fp8,fp8,0,0.549948787689209
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,8,8,128,1,float16,float16,0,0.4358880043029785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,8,8,128,1,float16,fp8,0,0.43531999588012693
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,8,8,128,1,fp8,fp8,0,0.4369935989379883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,8,1,128,1,float16,fp8,0,0.17709439992904663
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,8,1,128,1,fp8,fp8,0,0.17758400440216066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,8,2,128,1,float16,float16,0,0.21121280193328856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,8,2,128,1,float16,fp8,0,0.21191840171813964
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,8,2,128,1,fp8,fp8,0,0.21240639686584473
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,8,4,128,1,float16,float16,0,0.2866624116897583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,8,4,128,1,float16,fp8,0,0.2879647970199585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,8,4,128,1,fp8,fp8,0,0.28712639808654783
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,8,8,128,1,float16,float16,0,0.23062241077423096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,8,8,128,1,float16,fp8,0,0.22976479530334473
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,8,8,128,1,fp8,fp8,0,0.2295936107635498
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,8,1,128,1,float16,float16,0,0.1000704050064087
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,8,1,128,1,float16,fp8,0,0.10223519802093506
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,8,4,128,1,float16,fp8,0,0.1565343976020813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,8,1,128,1,fp8,fp8,0,0.10143359899520873
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,8,2,128,1,float16,float16,0,0.11860959529876709
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,8,2,128,1,float16,fp8,0,0.1192512035369873
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,8,2,128,1,fp8,fp8,0,0.11938079595565795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,8,4,128,1,float16,float16,0,0.15544639825820922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,8,4,128,1,fp8,fp8,0,0.15588639974594115
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,8,8,128,1,float16,float16,0,0.12770559787750244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,8,8,128,1,float16,fp8,0,0.12719520330429077
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,8,8,128,1,fp8,fp8,0,0.12713279724121093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,8,1,128,1,float16,float16,0,0.06036800146102905
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,8,1,128,1,float16,fp8,0,0.058006399869918825
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,8,1,128,1,fp8,fp8,0,0.05813599824905395
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,8,2,128,1,float16,float16,0,0.07077280282974244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,8,2,128,1,float16,fp8,0,0.06678400039672852
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,8,2,128,1,fp8,fp8,0,0.06702079772949218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,8,4,128,1,float16,float16,0,0.09012799859046935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,8,4,128,1,float16,fp8,0,0.08710240125656128
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,8,4,128,1,fp8,fp8,0,0.08681920170783997
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,8,8,128,1,float16,float16,0,0.07398399710655212
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,8,8,128,1,float16,fp8,0,0.06807039976119995
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,8,8,128,1,fp8,fp8,0,0.06814399957656861
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,8,1,128,1,float16,float16,0,0.03760960102081299
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,8,1,128,1,float16,fp8,0,0.03914720118045807
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,8,1,128,1,fp8,fp8,0,0.03930239975452423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,8,2,128,1,float16,float16,0,0.04194400012493134
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,8,2,128,1,float16,fp8,0,0.04397599995136261
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,8,2,128,1,fp8,fp8,0,0.04350399971008301
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,8,4,128,1,float16,float16,0,0.05119199752807617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,8,4,128,1,float16,fp8,0,0.05117279887199402
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,8,4,128,1,fp8,fp8,0,0.05093600153923035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,8,8,128,1,float16,float16,0,0.04097760021686554
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,8,8,128,1,float16,fp8,0,0.04211679995059967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,8,8,128,1,fp8,fp8,0,0.04242559969425201
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,8,1,128,1,float16,float16,0,0.02797600030899048
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,8,1,128,1,float16,fp8,0,0.029385599493980407
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,8,1,128,1,fp8,fp8,0,0.029374399781227113
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,8,2,128,1,float16,float16,0,0.028779199719429015
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,8,2,128,1,float16,fp8,0,0.03052160143852234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,8,2,128,1,fp8,fp8,0,0.03043999969959259
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,8,4,128,1,float16,float16,0,0.03323040008544922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,8,4,128,1,float16,fp8,0,0.03526079952716828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,8,4,128,1,fp8,fp8,0,0.03513759970664978
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,8,8,128,1,float16,float16,0,0.03296000063419342
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,8,8,128,1,float16,fp8,0,0.03485440015792847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,8,8,128,1,fp8,fp8,0,0.03478400111198425
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,8,1,128,1,float16,float16,0,0.027395200729370118
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,8,1,128,1,float16,fp8,0,0.02911199927330017
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,8,1,128,1,fp8,fp8,0,0.028804799914360045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,8,2,128,1,float16,float16,0,0.027710399031639098
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,8,2,128,1,float16,fp8,0,0.029129600524902342
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,8,2,128,1,fp8,fp8,0,0.02911359965801239
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,8,4,128,1,float16,float16,0,0.02848159968852997
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,8,4,128,1,float16,fp8,0,0.030369600653648375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,8,4,128,1,fp8,fp8,0,0.030296000838279723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,8,8,128,1,float16,float16,0,0.028071999549865723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,8,8,128,1,float16,fp8,0,0.02985599935054779
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,8,8,128,1,fp8,fp8,0,0.030103999376296996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,8,1,128,1,float16,float16,0,0.026414400339126586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,8,1,128,1,float16,fp8,0,0.02866879999637604
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,8,1,128,1,fp8,fp8,0,0.028649601340293884
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,8,2,128,1,float16,float16,0,0.0268640011548996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,8,2,128,1,float16,fp8,0,0.028299200534820556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,8,2,128,1,fp8,fp8,0,0.028284800052642823
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,8,4,128,1,float16,float16,0,0.02714880108833313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,8,4,128,1,float16,fp8,0,0.029067200422286988
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,8,4,128,1,fp8,fp8,0,0.02876160144805908
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,8,1,128,1,float16,float16,0,0.27677600383758544
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,8,1,128,1,float16,fp8,0,0.2859344005584717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,8,1,128,1,fp8,fp8,0,0.28592638969421386
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,8,2,128,1,float16,float16,0,0.35175840854644774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,8,2,128,1,float16,fp8,0,0.3544928073883057
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,8,2,128,1,fp8,fp8,0,0.355784010887146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,8,4,128,1,float16,float16,0,0.5015151977539063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,8,4,128,1,float16,fp8,0,0.5072751998901367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,8,4,128,1,fp8,fp8,0,0.5084496021270752
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,8,8,128,1,float16,float16,0,0.41361122131347655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,8,8,128,1,float16,fp8,0,0.40758237838745115
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,8,1,128,1,float16,float16,0,0.15086719989776612
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,8,8,128,1,fp8,fp8,0,0.4071455955505371
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,8,1,128,1,float16,fp8,0,0.15634560585021973
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,8,1,128,1,fp8,fp8,0,0.1556815981864929
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,8,2,128,1,float16,float16,0,0.1877120018005371
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,8,2,128,1,float16,fp8,0,0.1900431990623474
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,8,2,128,1,fp8,fp8,0,0.19011839628219604
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,8,1,128,1,float16,float16,0,0.08850240111351013
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,8,4,128,1,float16,float16,0,0.2630592107772827
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,8,4,128,1,float16,fp8,0,0.26503040790557864
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,8,4,128,1,fp8,fp8,0,0.26496479511260984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,8,8,128,1,float16,float16,0,0.21980159282684325
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,8,8,128,1,float16,fp8,0,0.21494240760803224
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,8,8,128,1,fp8,fp8,0,0.21495521068572998
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,8,1,128,1,float16,fp8,0,0.09030399918556213
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,8,1,128,1,fp8,fp8,0,0.0903984010219574
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,8,4,128,1,fp8,fp8,0,0.144758403301239
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,8,2,128,1,float16,float16,0,0.10612640380859376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,8,2,128,1,float16,fp8,0,0.10754079818725586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,8,2,128,1,fp8,fp8,0,0.10764800310134888
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,8,4,128,1,float16,float16,0,0.1432528018951416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,8,4,128,1,float16,fp8,0,0.14506560564041138
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,8,8,128,1,float16,float16,0,0.121343994140625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,8,8,128,1,float16,fp8,0,0.11843359470367432
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,8,8,128,1,fp8,fp8,0,0.11845599412918091
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,8,1,128,1,float16,float16,0,0.052369600534439086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,8,1,128,1,float16,fp8,0,0.05243200063705444
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,8,1,128,1,fp8,fp8,0,0.05200639963150024
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,8,2,128,1,float16,float16,0,0.06410080194473267
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,8,2,128,1,float16,fp8,0,0.05971840023994446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,8,2,128,1,fp8,fp8,0,0.06077759861946106
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,8,4,128,1,float16,float16,0,0.0832431972026825
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,8,4,128,1,float16,fp8,0,0.08087040185928344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,8,4,128,1,fp8,fp8,0,0.07923679947853088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,8,8,128,1,float16,float16,0,0.0696560025215149
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,8,8,128,1,float16,fp8,0,0.06152799725532532
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,8,8,128,1,fp8,fp8,0,0.06149119734764099
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,8,1,128,1,float16,float16,0,0.03203839957714081
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,8,1,128,1,float16,fp8,0,0.03424319922924042
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,8,1,128,1,fp8,fp8,0,0.034108799695968625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,8,2,128,1,float16,float16,0,0.03624320030212402
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,8,2,128,1,float16,fp8,0,0.038340801000595094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,8,2,128,1,fp8,fp8,0,0.03814240097999573
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,8,4,128,1,float16,float16,0,0.045070400834083556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,8,4,128,1,float16,fp8,0,0.0458512008190155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,8,4,128,1,fp8,fp8,0,0.04567039906978607
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,8,8,128,1,float16,float16,0,0.03778400123119354
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,8,8,128,1,float16,fp8,0,0.03852159976959228
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,8,8,128,1,fp8,fp8,0,0.03864000141620636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,8,1,128,1,float16,float16,0,0.024908800423145295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,8,1,128,1,float16,fp8,0,0.025916799902915955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,8,1,128,1,fp8,fp8,0,0.025918400287628172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,8,2,128,1,float16,float16,0,0.025887998938560485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,8,2,128,1,float16,fp8,0,0.02683520019054413
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,8,2,128,1,fp8,fp8,0,0.02653439939022064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,8,4,128,1,float16,float16,0,0.030184000730514526
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,8,4,128,1,float16,fp8,0,0.031167998909950256
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,8,8,128,1,float16,float16,0,0.03017120063304901
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,8,4,128,1,fp8,fp8,0,0.031083199381828307
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,8,8,128,1,float16,fp8,0,0.03110400140285492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,8,8,128,1,fp8,fp8,0,0.031118398904800414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,8,1,128,1,float16,float16,0,0.0242576003074646
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,8,1,128,1,float16,fp8,0,0.024911999702453613
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,8,1,128,1,fp8,fp8,0,0.025374400615692138
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,8,2,128,1,float16,float16,0,0.024590399861335755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,8,2,128,1,float16,fp8,0,0.025777599215507506
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,8,2,128,1,fp8,fp8,0,0.025377601385116577
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,8,4,128,1,float16,float16,0,0.025510400533676147
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,8,4,128,1,float16,fp8,0,0.026723200082778932
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,8,4,128,1,fp8,fp8,0,0.026761600375175477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,8,8,128,1,float16,float16,0,0.025353598594665527
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,8,8,128,1,float16,fp8,0,0.02627840042114258
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,8,8,128,1,fp8,fp8,0,0.026131200790405273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,8,1,128,1,float16,float16,0,0.023940800130367278
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,8,4,128,1,float16,float16,0,0.024456000328063963
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,8,4,128,1,float16,fp8,0,0.025360000133514405
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,8,1,128,1,float16,fp8,0,0.024801599979400634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,8,8,128,1,float16,fp8,0,0.02380480021238327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,8,1,128,1,fp8,fp8,0,0.0251008003950119
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,8,2,128,1,float16,float16,0,0.024129599332809448
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,8,2,128,1,float16,fp8,0,0.02494560033082962
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,8,2,128,1,fp8,fp8,0,0.025166401267051698
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,8,4,128,1,fp8,fp8,0,0.0256879985332489
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,8,8,128,1,float16,float16,0,0.022779199481010436
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,8,8,128,1,fp8,fp8,0,0.023715199530124666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,8,1,128,1,float16,float16,0,0.0222448006272316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,8,1,128,1,float16,fp8,0,0.023657600581645965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,8,1,128,1,fp8,fp8,0,0.023545600473880768
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,8,2,128,1,float16,float16,0,0.022188800573349
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,8,2,128,1,float16,fp8,0,0.023529599606990814
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,8,1,128,1,float16,fp8,0,0.15147199630737304
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,8,2,128,1,fp8,fp8,0,0.023523199558258056
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,8,4,128,1,float16,float16,0,0.022475199401378633
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,8,4,128,1,float16,fp8,0,0.023523199558258056
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,8,4,128,1,fp8,fp8,0,0.023324799537658692
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,8,1,128,1,float16,float16,0,0.1427135944366455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,8,1,128,1,fp8,fp8,0,0.1511296033859253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,8,2,128,1,float16,float16,0,0.17945120334625245
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,8,4,128,1,fp8,fp8,0,0.25980799198150634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,8,2,128,1,float16,fp8,0,0.187225604057312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,8,2,128,1,fp8,fp8,0,0.1871392011642456
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,8,4,128,1,float16,float16,0,0.2584320068359375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,8,4,128,1,float16,fp8,0,0.2597264051437378
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,8,8,128,1,float16,float16,0,0.21901121139526367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,8,8,128,1,float16,fp8,0,0.2113231897354126
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,8,8,128,1,fp8,fp8,0,0.21161119937896727
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,8,1,128,1,float16,float16,0,0.08407520055770874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,8,4,128,1,float16,float16,0,0.1396000027656555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,8,1,128,1,float16,fp8,0,0.08648959994316101
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,8,1,128,1,fp8,fp8,0,0.08656799793243408
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,8,2,128,1,float16,float16,0,0.10336159467697144
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,8,2,128,1,float16,fp8,0,0.10453599691390991
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,8,2,128,1,fp8,fp8,0,0.10420800447463989
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,8,4,128,1,float16,fp8,0,0.13963199853897096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,8,4,128,1,fp8,fp8,0,0.1393072009086609
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,8,8,128,1,float16,float16,0,0.11879040002822876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,8,8,128,1,float16,fp8,0,0.1146016001701355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,8,8,128,1,fp8,fp8,0,0.11486719846725464
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,8,1,128,1,float16,float16,0,0.048923200368881224
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,8,1,128,1,float16,fp8,0,0.04748800098896026
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,8,1,128,1,fp8,fp8,0,0.04750880002975464
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,8,2,128,1,float16,float16,0,0.06135680079460144
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,8,2,128,1,float16,fp8,0,0.054711997509002686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,8,2,128,1,fp8,fp8,0,0.05613759756088257
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,8,4,128,1,float16,float16,0,0.08126879930496216
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,8,4,128,1,float16,fp8,0,0.07558879852294922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,8,4,128,1,fp8,fp8,0,0.07512159943580628
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,8,8,128,1,float16,float16,0,0.06727520227432252
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,8,8,128,1,float16,fp8,0,0.05780320167541504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,8,8,128,1,fp8,fp8,0,0.05787680149078369
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,8,1,128,1,float16,float16,0,0.02933120131492615
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,8,1,128,1,float16,fp8,0,0.030689600110054015
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,8,1,128,1,fp8,fp8,0,0.03065440058708191
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,8,2,128,1,float16,float16,0,0.03636960089206696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,8,2,128,1,float16,fp8,0,0.034934398531913755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,8,8,128,1,float16,float16,0,0.03660480082035065
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,8,2,128,1,fp8,fp8,0,0.03487519919872284
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,8,4,128,1,float16,float16,0,0.04300160109996796
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,8,4,128,1,float16,fp8,0,0.042208001017570496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,8,4,128,1,fp8,fp8,0,0.04232800006866455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,8,8,128,1,float16,fp8,0,0.036985599994659425
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,8,8,128,1,fp8,fp8,0,0.036595198512077334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,8,1,128,1,float16,float16,0,0.023524799942970277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,8,1,128,1,float16,fp8,0,0.024105599522590636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,8,1,128,1,fp8,fp8,0,0.02396800071001053
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,8,2,128,1,float16,float16,0,0.024291199445724488
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,8,2,128,1,float16,fp8,0,0.02521919906139374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,8,2,128,1,fp8,fp8,0,0.02526560127735138
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,8,4,128,1,float16,float16,0,0.02889760136604309
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,8,4,128,1,float16,fp8,0,0.029679998755455017
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,8,4,128,1,fp8,fp8,0,0.029582399129867553
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,8,8,128,1,float16,float16,0,0.028300800919532777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,8,8,128,1,float16,fp8,0,0.029049599170684816
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,8,8,128,1,fp8,fp8,0,0.0290336012840271
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,8,1,128,1,float16,float16,0,0.02285120040178299
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,8,1,128,1,float16,fp8,0,0.023315200209617616
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,8,1,128,1,fp8,fp8,0,0.023416000604629516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,8,2,128,1,float16,float16,0,0.023052799701690673
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,8,2,128,1,float16,fp8,0,0.023528000712394713
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,8,2,128,1,fp8,fp8,0,0.023686400055885314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,8,4,128,1,float16,float16,0,0.02393600046634674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,8,4,128,1,float16,fp8,0,0.024699200689792634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,8,4,128,1,fp8,fp8,0,0.02463199943304062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,8,8,128,1,float16,float16,0,0.02401120066642761
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,8,8,128,1,float16,fp8,0,0.02433439940214157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,8,8,128,1,fp8,fp8,0,0.024135999381542206
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,8,1,128,1,float16,float16,0,0.022407999634742735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,8,1,128,1,fp8,fp8,0,0.02311040014028549
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,8,1,128,1,float16,fp8,0,0.023083199560642243
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,8,2,128,1,float16,float16,0,0.022283199429512023
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,8,2,128,1,float16,fp8,0,0.022832000255584718
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,8,2,128,1,fp8,fp8,0,0.023628799617290495
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,8,4,128,1,float16,float16,0,0.022867199778556824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,8,4,128,1,float16,fp8,0,0.02332960069179535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,8,4,128,1,fp8,fp8,0,0.023443199694156647
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,8,8,128,1,float16,float16,0,0.02157599925994873
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,8,8,128,1,float16,fp8,0,0.02200160026550293
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,8,8,128,1,fp8,fp8,0,0.021881599724292756
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,8,1,128,1,float16,float16,0,0.020868800580501556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,8,1,128,1,float16,fp8,0,0.021460799872875212
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,8,1,128,1,fp8,fp8,0,0.021484799683094025
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,8,2,128,1,float16,float16,0,0.02077919989824295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,8,2,128,1,float16,fp8,0,0.021704000234603883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,8,2,128,1,fp8,fp8,0,0.02174399942159653
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,8,4,128,1,float16,float16,0,0.021270400285720824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,8,4,128,1,float16,fp8,0,0.02183839976787567
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,8,4,128,1,fp8,fp8,0,0.02152319997549057
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,8,8,128,1,float16,float16,0,0.020129600167274476
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,8,8,128,1,float16,fp8,0,0.02093919962644577
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,8,8,128,1,fp8,fp8,0,0.020729599893093108
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,8,1,128,1,float16,float16,0,0.019883200526237488
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,8,1,128,1,float16,fp8,0,0.020552000403404234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,8,1,128,1,fp8,fp8,0,0.02075359970331192
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,8,2,128,1,float16,float16,0,0.019977599382400513
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,8,2,128,1,float16,fp8,0,0.020689600706100465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,8,2,128,1,fp8,fp8,0,0.020708799362182617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,8,4,128,1,float16,float16,0,0.020179200172424316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,8,4,128,1,float16,fp8,0,0.020571200549602507
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,8,4,128,1,fp8,fp8,0,0.020854400098323823
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,8,1,128,1,float16,float16,0,0.08551520109176636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,8,1,128,1,float16,fp8,0,0.0878928005695343
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,8,1,128,1,fp8,fp8,0,0.08792160153388977
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,8,2,128,1,float16,float16,0,0.10399680137634278
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,8,2,128,1,float16,fp8,0,0.10598880052566528
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,8,2,128,1,fp8,fp8,0,0.10579999685287475
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,8,8,128,1,fp8,fp8,0,0.1329184055328369
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,8,4,128,1,float16,float16,0,0.14183520078659057
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,8,4,128,1,float16,fp8,0,0.1409152030944824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,8,4,128,1,fp8,fp8,0,0.14094879627227783
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,8,8,128,1,float16,float16,0,0.13421599864959716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,8,8,128,1,float16,fp8,0,0.13337279558181764
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,8,1,128,1,float16,float16,0,0.049351999163627626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,8,1,128,1,float16,fp8,0,0.04783360064029694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,8,1,128,1,fp8,fp8,0,0.047593599557876586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,8,2,128,1,float16,float16,0,0.06083199977874756
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,8,2,128,1,float16,fp8,0,0.05691199898719788
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,8,2,128,1,fp8,fp8,0,0.057364797592163085
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,8,4,128,1,float16,float16,0,0.08142560124397277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,8,4,128,1,float16,fp8,0,0.07596799731254578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,8,4,128,1,fp8,fp8,0,0.07646080255508422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,8,8,128,1,float16,float16,0,0.07410240173339844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,8,8,128,1,float16,fp8,0,0.06707839965820313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,8,8,128,1,fp8,fp8,0,0.06699039936065673
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,8,2,128,1,fp8,fp8,0,0.03547520041465759
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,8,1,128,1,float16,float16,0,0.029763200879096986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,8,1,128,1,float16,fp8,0,0.031116798520088196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,8,1,128,1,fp8,fp8,0,0.03144479990005493
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,8,2,128,1,float16,float16,0,0.03436320126056671
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,8,2,128,1,float16,fp8,0,0.035416001081466676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,8,4,128,1,float16,float16,0,0.0423440009355545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,8,4,128,1,float16,fp8,0,0.04306240081787109
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,8,4,128,1,fp8,fp8,0,0.04270719885826111
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,8,8,128,1,float16,float16,0,0.04014880061149597
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,8,8,128,1,float16,fp8,0,0.041526401042938234
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,8,8,128,1,fp8,fp8,0,0.041555199027061465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,8,1,128,1,float16,float16,0,0.023812800645828247
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,8,1,128,1,float16,fp8,0,0.024435199797153473
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,8,1,128,1,fp8,fp8,0,0.02464960068464279
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,8,2,128,1,float16,float16,0,0.025016000866889952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,8,2,128,1,float16,fp8,0,0.025646400451660157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,8,2,128,1,fp8,fp8,0,0.025879999995231627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,8,4,128,1,float16,float16,0,0.029385599493980407
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,8,4,128,1,float16,fp8,0,0.029862400889396668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,8,4,128,1,fp8,fp8,0,0.029879999160766602
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,8,8,128,1,float16,float16,0,0.02810719907283783
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,8,8,128,1,float16,fp8,0,0.02903040051460266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,8,8,128,1,fp8,fp8,0,0.029016000032424927
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,8,1,128,1,float16,float16,0,0.02251359969377518
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,8,1,128,1,float16,fp8,0,0.023367999494075774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,8,1,128,1,fp8,fp8,0,0.02344159930944443
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,8,2,128,1,float16,float16,0,0.02279839962720871
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,8,2,128,1,float16,fp8,0,0.02386240065097809
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,8,2,128,1,fp8,fp8,0,0.023625600337982177
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,8,4,128,1,float16,float16,0,0.023892800509929656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,8,4,128,1,float16,fp8,0,0.024668799340724946
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,8,4,128,1,fp8,fp8,0,0.024868799746036528
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,8,8,128,1,float16,float16,0,0.023683199286460878
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,8,8,128,1,float16,fp8,0,0.024459199607372285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,8,8,128,1,fp8,fp8,0,0.02420320063829422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,8,1,128,1,float16,float16,0,0.022439999878406523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,8,1,128,1,float16,fp8,0,0.023182399570941925
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,8,1,128,1,fp8,fp8,0,0.023396800458431243
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,8,2,128,1,float16,float16,0,0.022806400060653688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,8,2,128,1,float16,fp8,0,0.023656000196933747
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,8,2,128,1,fp8,fp8,0,0.02356480062007904
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,8,4,128,1,float16,float16,0,0.022921599447727203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,8,4,128,1,float16,fp8,0,0.023479999601840974
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,8,4,128,1,fp8,fp8,0,0.023576000332832338
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,8,8,128,1,float16,float16,0,0.02143840044736862
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,8,8,128,1,float16,fp8,0,0.021876800060272216
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,8,8,128,1,fp8,fp8,0,0.02212799936532974
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,8,1,128,1,float16,float16,0,0.02074880003929138
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,8,1,128,1,float16,fp8,0,0.021692800521850585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,8,1,128,1,fp8,fp8,0,0.021457600593566894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,8,2,128,1,float16,float16,0,0.020942400395870208
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,8,2,128,1,float16,fp8,0,0.02184160053730011
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,8,2,128,1,fp8,fp8,0,0.02187040001153946
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,8,4,128,1,float16,float16,0,0.020934399962425233
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,8,4,128,1,float16,fp8,0,0.02197439968585968
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,8,4,128,1,fp8,fp8,0,0.021804800629615782
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,8,8,128,1,float16,float16,0,0.020113599300384522
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,8,8,128,1,float16,fp8,0,0.020683200657367708
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,8,8,128,1,fp8,fp8,0,0.0208624005317688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,8,1,128,1,float16,float16,0,0.01996160000562668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,8,1,128,1,float16,fp8,0,0.02067359983921051
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,8,1,128,1,fp8,fp8,0,0.020635199546813966
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,8,2,128,1,float16,float16,0,0.019811199605464937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,8,2,128,1,float16,fp8,0,0.020822399854660036
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,8,2,128,1,fp8,fp8,0,0.02060000002384186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,8,4,128,1,float16,float16,0,0.019971199333667755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,8,4,128,1,float16,fp8,0,0.020878399908542632
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,8,4,128,1,fp8,fp8,0,0.020876799523830415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,8,8,128,1,float16,float16,0,0.019699199497699736
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,8,8,128,1,float16,fp8,0,0.02041279971599579
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,8,8,128,1,fp8,fp8,0,0.0204352006316185
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,8,1,128,1,float16,float16,0,0.0197952002286911
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,8,1,128,1,float16,fp8,0,0.02035039961338043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,8,1,128,1,fp8,fp8,0,0.020263999700546265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,8,2,128,1,float16,float16,0,0.019636799395084382
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,8,2,128,1,float16,fp8,0,0.020630399882793426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,8,2,128,1,fp8,fp8,0,0.020598399639129638
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,8,4,128,1,float16,float16,0,0.019568000733852387
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,8,4,128,1,float16,fp8,0,0.02059520035982132
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,8,4,128,1,fp8,fp8,0,0.020692799985408784
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,8,1,128,1,float16,float16,0,0.052086400985717776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,8,1,128,1,float16,fp8,0,0.049859198927879336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,8,1,128,1,fp8,fp8,0,0.05076799988746643
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,8,2,128,1,float16,float16,0,0.06285120248794555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,8,2,128,1,float16,fp8,0,0.058374398946762086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,8,2,128,1,fp8,fp8,0,0.05755199790000916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,8,4,128,1,float16,float16,0,0.09698399901390076
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,8,4,128,1,float16,fp8,0,0.09623039960861206
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,8,4,128,1,fp8,fp8,0,0.09592000246047974
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,8,8,128,1,float16,float16,0,0.09026560187339783
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,8,8,128,1,float16,fp8,0,0.08600800037384033
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,8,8,128,1,fp8,fp8,0,0.08603519797325135
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,8,1,128,1,float16,float16,0,0.030564799904823303
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,8,1,128,1,float16,fp8,0,0.03219519853591919
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,8,4,128,1,float16,fp8,0,0.05288959741592407
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,8,1,128,1,fp8,fp8,0,0.03228319883346557
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,8,2,128,1,float16,float16,0,0.035278400778770445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,8,2,128,1,float16,fp8,0,0.0366703987121582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,8,2,128,1,fp8,fp8,0,0.036494401097297666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,8,4,128,1,float16,float16,0,0.05063199996948242
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,8,4,128,1,fp8,fp8,0,0.052537602186203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,8,8,128,1,float16,float16,0,0.04808480143547058
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,8,8,128,1,float16,fp8,0,0.05084159970283508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,8,8,128,1,fp8,fp8,0,0.05087360143661499
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,8,1,128,1,float16,float16,0,0.02476319968700409
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,8,1,128,1,float16,fp8,0,0.02510879933834076
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,8,1,128,1,fp8,fp8,0,0.0251120001077652
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,8,2,128,1,float16,float16,0,0.025630399584770203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,8,2,128,1,float16,fp8,0,0.026233598589897156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,8,2,128,1,fp8,fp8,0,0.02619360089302063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,8,4,128,1,float16,float16,0,0.033199998736381534
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,8,4,128,1,float16,fp8,0,0.03503359854221344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,8,4,128,1,fp8,fp8,0,0.034852799773216245
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,8,8,128,1,float16,float16,0,0.03228960037231445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,8,8,128,1,float16,fp8,0,0.03376320004463196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,8,8,128,1,fp8,fp8,0,0.03394240140914917
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,8,2,128,1,float16,fp8,0,0.02407840043306351
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,8,1,128,1,float16,float16,0,0.022814400494098663
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,8,1,128,1,float16,fp8,0,0.023878400027751923
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,8,1,128,1,fp8,fp8,0,0.023732799291610717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,8,2,128,1,float16,float16,0,0.02330559939146042
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,8,2,128,1,fp8,fp8,0,0.024275200068950654
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,8,4,128,1,float16,float16,0,0.024201600253582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,8,4,128,1,float16,fp8,0,0.024984000623226164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,8,4,128,1,fp8,fp8,0,0.02497279942035675
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,8,8,128,1,float16,float16,0,0.023992000520229338
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,8,8,128,1,float16,fp8,0,0.02452960014343262
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,8,8,128,1,fp8,fp8,0,0.024644799530506134
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,8,1,128,1,float16,float16,0,0.02253119945526123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,8,1,128,1,float16,fp8,0,0.02332320064306259
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,8,1,128,1,fp8,fp8,0,0.02306720018386841
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,8,2,128,1,float16,float16,0,0.022860799729824067
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,8,2,128,1,fp8,fp8,0,0.02348479926586151
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,8,2,128,1,float16,fp8,0,0.023287999629974365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,8,4,128,1,float16,float16,0,0.022761599719524385
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,8,4,128,1,float16,fp8,0,0.02356960028409958
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,8,4,128,1,fp8,fp8,0,0.023238399624824525
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,8,8,128,1,float16,float16,0,0.021587200462818146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,8,8,128,1,float16,fp8,0,0.022364799678325654
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,8,8,128,1,fp8,fp8,0,0.022463999688625336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,8,1,128,1,float16,float16,0,0.02091200053691864
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,8,1,128,1,float16,fp8,0,0.021849599480628968
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,8,1,128,1,fp8,fp8,0,0.021691200137138367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,8,2,128,1,float16,float16,0,0.021073600649833678
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,8,2,128,1,float16,fp8,0,0.021830399334430695
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,8,2,128,1,fp8,fp8,0,0.0217631995677948
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,8,4,128,1,float16,float16,0,0.021030400693416596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,8,4,128,1,float16,fp8,0,0.022337600588798523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,8,4,128,1,fp8,fp8,0,0.02200320065021515
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,8,2,128,1,float16,float16,0,0.0198512002825737
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,8,8,128,1,float16,float16,0,0.020126399397850037
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,8,8,128,1,float16,fp8,0,0.02125120013952255
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,8,8,128,1,fp8,fp8,0,0.02094080001115799
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,8,1,128,1,float16,float16,0,0.019977599382400513
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,8,1,128,1,float16,fp8,0,0.020817600190639496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,8,1,128,1,fp8,fp8,0,0.020505599677562714
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,8,2,128,1,float16,fp8,0,0.020659199357032774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,8,2,128,1,fp8,fp8,0,0.020553599298000335
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,8,4,128,1,float16,float16,0,0.01987359970808029
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,8,4,128,1,float16,fp8,0,0.021113599836826324
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,8,4,128,1,fp8,fp8,0,0.020664000511169435
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,8,8,128,1,float16,float16,0,0.019823999702930452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,8,8,128,1,float16,fp8,0,0.020694400370121
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,8,8,128,1,fp8,fp8,0,0.020239999890327452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,8,1,128,1,float16,float16,0,0.019617600739002226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,8,1,128,1,float16,fp8,0,0.02038400024175644
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,8,1,128,1,fp8,fp8,0,0.02048639953136444
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,8,2,128,1,float16,float16,0,0.019735999405384064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,8,2,128,1,float16,fp8,0,0.02030239999294281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,8,2,128,1,fp8,fp8,0,0.02060000002384186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,8,4,128,1,float16,float16,0,0.019652800261974336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,8,4,128,1,float16,fp8,0,0.020046399533748628
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,8,4,128,1,fp8,fp8,0,0.02030559927225113
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,8,8,128,1,float16,float16,0,0.01950719952583313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,8,8,128,1,float16,fp8,0,0.02017119973897934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,8,8,128,1,fp8,fp8,0,0.020550400018692017
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,8,1,128,1,float16,float16,0,0.01919199973344803
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,8,1,128,1,float16,fp8,0,0.02040800005197525
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,8,1,128,1,fp8,fp8,0,0.020396800339221956
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,8,2,128,1,float16,float16,0,0.01931840032339096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,8,2,128,1,float16,fp8,0,0.020127999782562255
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,8,2,128,1,fp8,fp8,0,0.020231999456882477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,8,4,128,1,float16,float16,0,0.019480000436306
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,8,4,128,1,float16,fp8,0,0.02043839991092682
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,8,4,128,1,fp8,fp8,0,0.020126399397850037
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,8,1,128,1,float16,float16,0,0.03284479975700379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,8,1,128,1,float16,fp8,0,0.034436801075935365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,8,1,128,1,fp8,fp8,0,0.03461920022964478
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,8,2,128,1,float16,float16,0,0.044331198930740355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,8,2,128,1,float16,fp8,0,0.04758720099925995
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,8,2,128,1,fp8,fp8,0,0.047958400845527646
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,8,4,128,1,float16,float16,0,0.06736479997634888
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,8,4,128,1,float16,fp8,0,0.07277920246124267
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,8,4,128,1,fp8,fp8,0,0.0724016010761261
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,8,8,128,1,float16,float16,0,0.06422240138053895
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,8,8,128,1,float16,fp8,0,0.06987040042877198
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,8,8,128,1,fp8,fp8,0,0.06986399888992309
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,8,1,128,1,float16,float16,0,0.026038399338722228
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,8,1,128,1,float16,fp8,0,0.026612800359725953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,8,1,128,1,fp8,fp8,0,0.026950401067733765
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,8,2,128,1,float16,float16,0,0.030126398801803587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,8,2,128,1,float16,fp8,0,0.03169440031051636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,8,2,128,1,fp8,fp8,0,0.03175680041313171
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,8,4,128,1,float16,float16,0,0.04194880127906799
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,8,4,128,1,float16,fp8,0,0.044993600249290465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,8,4,128,1,fp8,fp8,0,0.04512960016727448
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,8,8,128,1,float16,float16,0,0.040136000514030455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,8,8,128,1,float16,fp8,0,0.04375520050525665
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,8,8,128,1,fp8,fp8,0,0.043459200859069826
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,8,1,128,1,float16,float16,0,0.02345760017633438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,8,1,128,1,float16,fp8,0,0.024484799802303316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,8,1,128,1,fp8,fp8,0,0.02494720071554184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,8,2,128,1,float16,float16,0,0.023688000440597535
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,8,2,128,1,float16,fp8,0,0.02468640059232712
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,8,8,128,1,float16,float16,0,0.027608001232147218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,8,2,128,1,fp8,fp8,0,0.024702399969100952
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,8,4,128,1,float16,float16,0,0.028683200478553772
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,8,4,128,1,float16,fp8,0,0.030079999566078187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,8,4,128,1,fp8,fp8,0,0.03007200062274933
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,8,8,128,1,float16,fp8,0,0.029388800263404846
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,8,8,128,1,fp8,fp8,0,0.02954559922218323
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,8,1,128,1,float16,float16,0,0.022761599719524385
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,8,1,128,1,float16,fp8,0,0.023947200179100035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,8,1,128,1,fp8,fp8,0,0.024292799830436706
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,8,2,128,1,float16,float16,0,0.022808000445365906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,8,2,128,1,float16,fp8,0,0.02374880015850067
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,8,2,128,1,fp8,fp8,0,0.02381120026111603
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,8,4,128,1,float16,float16,0,0.023358400166034698
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,8,4,128,1,float16,fp8,0,0.02396959960460663
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,8,4,128,1,fp8,fp8,0,0.02404319941997528
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,8,8,128,1,float16,float16,0,0.021712000668048858
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,8,8,128,1,float16,fp8,0,0.02248319983482361
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,8,8,128,1,fp8,fp8,0,0.022427199780941008
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,8,1,128,1,float16,float16,0,0.02131199985742569
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,8,1,128,1,float16,fp8,0,0.022023999691009523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,8,1,128,1,fp8,fp8,0,0.021996800601482392
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,8,2,128,1,float16,float16,0,0.020873600244522096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,8,2,128,1,float16,fp8,0,0.02178879976272583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,8,2,128,1,fp8,fp8,0,0.021804800629615782
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,8,4,128,1,float16,float16,0,0.021353599429130555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,8,4,128,1,float16,fp8,0,0.021928000450134277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,8,4,128,1,fp8,fp8,0,0.022144000232219695
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,8,8,128,1,float16,float16,0,0.02038239985704422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,8,8,128,1,float16,fp8,0,0.021374399960041045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,8,8,128,1,fp8,fp8,0,0.02162719964981079
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,8,1,128,1,float16,float16,0,0.01996160000562668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,8,1,128,1,float16,fp8,0,0.0208064004778862
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,8,1,128,1,fp8,fp8,0,0.0208624005317688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,8,2,128,1,float16,float16,0,0.019969600439071655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,8,2,128,1,float16,fp8,0,0.020644800364971162
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,8,2,128,1,fp8,fp8,0,0.020891200006008147
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,8,4,128,1,float16,float16,0,0.020452800393104553
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,8,4,128,1,float16,fp8,0,0.02130240052938461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,8,4,128,1,fp8,fp8,0,0.021139200031757354
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,8,8,128,1,float16,float16,0,0.02008160054683685
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,8,8,128,1,float16,fp8,0,0.020843200385570526
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,8,2,128,1,float16,fp8,0,0.02008800059556961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,8,2,128,1,fp8,fp8,0,0.020289599895477295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,8,8,128,1,fp8,fp8,0,0.02080959975719452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,8,1,128,1,float16,float16,0,0.01942239999771118
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,8,1,128,1,float16,fp8,0,0.02051839977502823
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,8,1,128,1,fp8,fp8,0,0.020598399639129638
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,8,2,128,1,float16,float16,0,0.019601599872112276
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,8,4,128,1,float16,float16,0,0.01947679966688156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,8,4,128,1,float16,fp8,0,0.020374399423599244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,8,4,128,1,fp8,fp8,0,0.02028000056743622
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,8,8,128,1,float16,float16,0,0.01955839991569519
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,8,8,128,1,float16,fp8,0,0.020270399749279022
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,8,8,128,1,fp8,fp8,0,0.020399999618530274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,8,1,128,1,float16,float16,0,0.019393600523471832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,8,1,128,1,float16,fp8,0,0.02027200013399124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,8,1,128,1,fp8,fp8,0,0.020175999402999877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,8,2,128,1,float16,float16,0,0.019193600118160247
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,8,2,128,1,float16,fp8,0,0.020235200226306916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,8,2,128,1,fp8,fp8,0,0.01993599981069565
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,8,4,128,1,float16,float16,0,0.019276799261569978
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,8,4,128,1,float16,fp8,0,0.020095999538898467
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,8,4,128,1,fp8,fp8,0,0.0201664000749588
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,8,8,128,1,float16,float16,0,0.019359999895095827
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,8,8,128,1,float16,fp8,0,0.020180800557136537
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,8,8,128,1,fp8,fp8,0,0.020108799636363982
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,8,1,128,1,float16,float16,0,0.019284799695014954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,8,1,128,1,float16,fp8,0,0.02007199972867966
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,8,1,128,1,fp8,fp8,0,0.02001120001077652
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,8,2,128,1,float16,float16,0,0.01897920072078705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,8,2,128,1,fp8,fp8,0,0.019852800667285918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,8,2,128,1,float16,fp8,0,0.020076799392700195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,8,4,128,1,float16,float16,0,0.019283199310302736
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,8,4,128,1,float16,fp8,0,0.020027199387550355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,8,4,128,1,fp8,fp8,0,0.02024320065975189
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,8,1,128,1,float16,float16,0,0.029462400078773498
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,8,1,128,1,float16,fp8,0,0.03144319951534271
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,8,1,128,1,fp8,fp8,0,0.031492799520492554
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,8,2,128,1,float16,fp8,0,0.040043199062347413
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,8,2,128,1,float16,float16,0,0.03746879994869232
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,8,2,128,1,fp8,fp8,0,0.04036319851875305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,8,4,128,1,float16,float16,0,0.05189759731292724
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,8,4,128,1,float16,fp8,0,0.05818719863891601
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,8,4,128,1,fp8,fp8,0,0.057915198802948
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,8,8,128,1,float16,float16,0,0.04831359982490539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,8,8,128,1,float16,fp8,0,0.055086398124694826
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,8,8,128,1,fp8,fp8,0,0.054958397150039674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,8,1,128,1,float16,float16,0,0.02276480048894882
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,8,1,128,1,float16,fp8,0,0.023659199476242065
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,8,1,128,1,fp8,fp8,0,0.023414400219917298
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,8,2,128,1,float16,float16,0,0.026704001426696777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,8,2,128,1,float16,fp8,0,0.02794240117073059
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,8,2,128,1,fp8,fp8,0,0.028097599744796753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,8,4,128,1,float16,float16,0,0.03403519988059998
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,8,4,128,1,float16,fp8,0,0.03744640052318573
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,8,4,128,1,fp8,fp8,0,0.037115201354026794
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,8,8,128,1,float16,float16,0,0.03259040117263794
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,8,8,128,1,float16,fp8,0,0.03594720065593719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,8,8,128,1,fp8,fp8,0,0.0358240008354187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,8,1,128,1,float16,float16,0,0.020839999616146087
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,8,1,128,1,float16,fp8,0,0.02175839990377426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,8,1,128,1,fp8,fp8,0,0.02211360037326813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,8,2,128,1,float16,float16,0,0.021139200031757354
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,8,2,128,1,float16,fp8,0,0.022014400362968443
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,8,2,128,1,fp8,fp8,0,0.022251200675964356
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,8,4,128,1,float16,float16,0,0.025084799528121947
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,8,4,128,1,float16,fp8,0,0.026545599102973938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,8,4,128,1,fp8,fp8,0,0.02656959891319275
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,8,8,128,1,float16,float16,0,0.02422720044851303
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,8,8,128,1,float16,fp8,0,0.026144000887870788
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,8,8,128,1,fp8,fp8,0,0.026121601462364197
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,8,1,128,1,float16,float16,0,0.019993600249290467
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,8,1,128,1,float16,fp8,0,0.02080159932374954
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,8,1,128,1,fp8,fp8,0,0.02082560062408447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,8,2,128,1,float16,float16,0,0.02008800059556961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,8,2,128,1,float16,fp8,0,0.02099359929561615
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,8,2,128,1,fp8,fp8,0,0.021180799603462218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,8,4,128,1,float16,float16,0,0.020483200252056123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,8,4,128,1,float16,fp8,0,0.021692800521850585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,8,4,128,1,fp8,fp8,0,0.0213359996676445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,8,8,128,1,float16,float16,0,0.020057600736618043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,8,8,128,1,float16,fp8,0,0.021048000454902648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,8,8,128,1,fp8,fp8,0,0.021012799441814424
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,8,1,128,1,float16,float16,0,0.019555200636386872
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,8,1,128,1,float16,fp8,0,0.02006720006465912
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,8,1,128,1,fp8,fp8,0,0.0203792005777359
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,8,2,128,1,float16,float16,0,0.01952800005674362
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,8,2,128,1,float16,fp8,0,0.020233599841594695
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,8,2,128,1,fp8,fp8,0,0.020239999890327452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,8,4,128,1,float16,float16,0,0.019817599654197694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,8,4,128,1,float16,fp8,0,0.020742399990558623
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,8,4,128,1,fp8,fp8,0,0.020664000511169435
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,8,8,128,1,float16,float16,0,0.019972799718379973
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,8,8,128,1,float16,fp8,0,0.020521600544452668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,8,8,128,1,fp8,fp8,0,0.020436799526214598
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,8,1,128,1,float16,float16,0,0.019208000600337984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,8,1,128,1,float16,fp8,0,0.02003840059041977
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,8,1,128,1,fp8,fp8,0,0.020076799392700195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,8,2,128,1,float16,float16,0,0.01947360038757324
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,8,2,128,1,float16,fp8,0,0.01995680034160614
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,8,2,128,1,fp8,fp8,0,0.0203792005777359
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,8,4,128,1,float16,float16,0,0.019494399428367615
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,8,4,128,1,float16,fp8,0,0.020393599569797517
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,8,4,128,1,fp8,fp8,0,0.020584000647068022
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,8,8,128,1,float16,float16,0,0.019313600659370423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,8,8,128,1,float16,fp8,0,0.020377600193023683
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,8,8,128,1,fp8,fp8,0,0.020047999918460846
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,8,1,128,1,float16,float16,0,0.018831999599933626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,8,1,128,1,float16,fp8,0,0.01961120069026947
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,8,1,128,1,fp8,fp8,0,0.01958400011062622
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,8,2,128,1,float16,float16,0,0.018995200097560883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,8,2,128,1,float16,fp8,0,0.020132799446582795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,8,2,128,1,fp8,fp8,0,0.019628800451755524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,8,4,128,1,float16,float16,0,0.019419200718402863
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,8,4,128,1,float16,fp8,0,0.019897599518299103
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,8,4,128,1,fp8,fp8,0,0.019875200092792512
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,8,8,128,1,float16,float16,0,0.019529600441455842
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,8,8,128,1,float16,fp8,0,0.019790400564670563
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,8,8,128,1,fp8,fp8,0,0.01977439969778061
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,8,1,128,1,float16,float16,0,0.017083199322223665
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,8,1,128,1,float16,fp8,0,0.01777600049972534
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,8,1,128,1,fp8,fp8,0,0.018156799674034118
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,8,2,128,1,float16,float16,0,0.0189968004822731
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,8,2,128,1,float16,fp8,0,0.019734400510787963
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,8,2,128,1,fp8,fp8,0,0.019900800287723543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,8,4,128,1,float16,float16,0,0.01912959963083267
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,8,4,128,1,float16,fp8,0,0.019494399428367615
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,8,4,128,1,fp8,fp8,0,0.019947199523448943
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,8,8,128,1,float16,float16,0,0.018729600310325622
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,8,8,128,1,float16,fp8,0,0.01973759979009628
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,8,8,128,1,fp8,fp8,0,0.019601599872112276
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,8,1,128,1,float16,float16,0,0.016300800442695617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,8,1,128,1,float16,fp8,0,0.017155200242996216
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,8,1,128,1,fp8,fp8,0,0.017027199268341064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,8,2,128,1,float16,float16,0,0.016892799735069276
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,8,2,128,1,float16,fp8,0,0.017790399491786957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,8,2,128,1,fp8,fp8,0,0.017825600504875184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,8,4,128,1,float16,float16,0,0.01884319931268692
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,8,4,128,1,float16,fp8,0,0.019415999948978423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,8,4,128,1,fp8,fp8,0,0.0195375993847847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,4,1,128,1,float16,fp8,0,3.2901840209960938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,4,1,128,1,fp8,fp8,0,3.2907886505126953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,4,1,128,1,float16,float16,0,3.7173648834228517
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,4,4,128,1,float16,float16,0,2.0441856384277344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,4,2,128,1,float16,fp8,0,3.4252479553222654
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,4,4,128,1,float16,fp8,0,1.896419143676758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,4,2,128,1,float16,float16,0,3.815460968017578
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,4,2,128,1,fp8,fp8,0,3.426252746582031
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,4,4,128,1,fp8,fp8,0,1.8968799591064454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,4,1,128,1,float16,float16,0,1.7983552932739257
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,4,1,128,1,float16,fp8,0,1.8207759857177734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,4,1,128,1,fp8,fp8,0,1.7540367126464844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,4,2,128,1,float16,float16,0,1.8756784439086913
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,4,2,128,1,float16,fp8,0,1.7320463180541992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,4,4,128,1,float16,float16,0,1.0175104141235352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,4,2,128,1,fp8,fp8,0,1.817945671081543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,4,4,128,1,float16,fp8,0,1.015670394897461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,4,4,128,1,fp8,fp8,0,0.984124755859375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,4,1,128,1,float16,float16,0,0.9026752471923828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,4,1,128,1,float16,fp8,0,0.9260000228881836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,4,1,128,1,fp8,fp8,0,0.8507935523986816
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,4,2,128,1,float16,float16,0,0.9432736396789551
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,4,2,128,1,fp8,fp8,0,0.8969856262207031
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,4,4,128,1,float16,float16,0,0.5196239948272705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,4,2,128,1,float16,fp8,0,0.8899680137634277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,4,4,128,1,float16,fp8,0,0.5064239978790284
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,4,4,128,1,fp8,fp8,0,0.5116735935211182
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,4,1,128,1,float16,float16,0,0.4533440113067627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,4,1,128,1,float16,fp8,0,0.4464303970336914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,4,1,128,1,fp8,fp8,0,0.4334432125091553
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,4,2,128,1,float16,float16,0,0.48122081756591795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,4,2,128,1,float16,fp8,0,0.45481438636779786
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,4,2,128,1,fp8,fp8,0,0.44930081367492675
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,4,1,128,1,float16,float16,0,2.0796064376831054
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,4,1,128,1,float16,fp8,0,1.929267120361328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,4,1,128,1,fp8,fp8,0,1.9321456909179688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,4,4,128,1,float16,float16,0,1.2530768394470215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,4,2,128,1,float16,float16,0,2.185665512084961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,4,2,128,1,float16,fp8,0,2.0440479278564454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,4,2,128,1,fp8,fp8,0,2.04824161529541
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,4,4,128,1,float16,fp8,0,1.2391263961791992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,4,4,128,1,fp8,fp8,0,1.1573488235473632
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,4,1,128,1,float16,float16,0,1.0283696174621582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,4,1,128,1,float16,fp8,0,0.992347240447998
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,4,1,128,1,fp8,fp8,0,1.0170623779296875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,4,4,128,1,float16,float16,0,0.6624512195587158
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,4,2,128,1,float16,float16,0,1.103775978088379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,4,4,128,1,float16,fp8,0,0.6055439949035645
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,4,2,128,1,float16,fp8,0,1.038535976409912
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,4,1,128,1,fp8,fp8,0,0.503715181350708
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,4,2,128,1,fp8,fp8,0,1.0366479873657226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,4,4,128,1,fp8,fp8,0,0.6177567958831787
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,4,1,128,1,float16,float16,0,0.5280367851257324
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,4,1,128,1,float16,fp8,0,0.5079391956329345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,4,2,128,1,float16,float16,0,0.6002816200256348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,4,2,128,1,float16,fp8,0,0.5331215858459473
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,4,2,128,1,fp8,fp8,0,0.536252784729004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,4,4,128,1,float16,float16,0,0.3622591972351074
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,4,4,128,1,float16,fp8,0,0.3213279962539673
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,4,4,128,1,fp8,fp8,0,0.32223360538482665
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,4,1,128,1,float16,float16,0,0.2869215965270996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,4,1,128,1,float16,fp8,0,0.27388958930969237
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,4,1,128,1,fp8,fp8,0,0.2783616065979004
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,4,2,128,1,float16,float16,0,0.30678720474243165
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,4,2,128,1,float16,fp8,0,0.2839087963104248
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,4,2,128,1,fp8,fp8,0,0.2856031894683838
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,4,1,128,1,float16,float16,0,1.486070442199707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,4,1,128,1,float16,fp8,0,1.3708127975463866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,4,1,128,1,fp8,fp8,0,1.3701312065124511
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,4,2,128,1,float16,float16,0,1.5633184432983398
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,4,2,128,1,float16,fp8,0,1.4668911933898925
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,4,4,128,1,float16,float16,0,0.914401626586914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,4,1,128,1,float16,float16,0,0.7403823852539062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,4,4,128,1,float16,fp8,0,0.8561216354370117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,4,2,128,1,fp8,fp8,0,1.458743953704834
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,4,4,128,1,fp8,fp8,0,0.8979439735412598
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,4,1,128,1,float16,fp8,0,0.704857587814331
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,4,2,128,1,float16,fp8,0,0.7440800189971923
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,4,1,128,1,fp8,fp8,0,0.7679152011871337
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,4,2,128,1,float16,float16,0,0.8039648056030273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,4,4,128,1,float16,float16,0,0.4970111846923828
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,4,2,128,1,fp8,fp8,0,0.7510144233703613
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,4,4,128,1,float16,fp8,0,0.4501999855041504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,4,4,128,1,fp8,fp8,0,0.4530831813812256
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,4,1,128,1,float16,float16,0,0.38656799793243407
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,4,1,128,1,float16,fp8,0,0.3627791881561279
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,4,1,128,1,fp8,fp8,0,0.37311201095581054
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,4,2,128,1,float16,float16,0,0.42409439086914064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,4,2,128,1,float16,fp8,0,0.3931600093841553
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,4,2,128,1,fp8,fp8,0,0.3912832021713257
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,4,4,128,1,float16,float16,0,0.28889760971069334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,4,4,128,1,float16,fp8,0,0.23603520393371583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,4,4,128,1,fp8,fp8,0,0.23653600215911866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,4,1,128,1,float16,float16,0,0.20836958885192872
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,4,1,128,1,float16,fp8,0,0.20028159618377686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,4,1,128,1,fp8,fp8,0,0.20086400508880614
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,4,2,128,1,float16,float16,0,0.2255728006362915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,4,2,128,1,float16,fp8,0,0.2099168062210083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,4,2,128,1,fp8,fp8,0,0.20863039493560792
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,4,1,128,1,float16,float16,0,1.9593759536743165
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,4,1,128,1,float16,fp8,0,1.7799903869628906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,4,1,128,1,fp8,fp8,0,1.7815536499023437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,4,4,128,1,float16,float16,0,1.1956624031066894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,4,4,128,1,float16,fp8,0,1.133403205871582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,4,2,128,1,float16,float16,0,2.073606491088867
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,4,2,128,1,float16,fp8,0,1.9520767211914063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,4,1,128,1,float16,float16,0,0.9702848434448242
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,4,2,128,1,fp8,fp8,0,1.9256511688232423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,4,4,128,1,fp8,fp8,0,1.175699234008789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,4,1,128,1,float16,fp8,0,0.9090111732482911
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,4,1,128,1,fp8,fp8,0,0.9110336303710938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,4,2,128,1,float16,float16,0,1.0343631744384765
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,4,2,128,1,float16,fp8,0,1.0015199661254883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,4,4,128,1,float16,float16,0,0.6101168155670166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,4,2,128,1,fp8,fp8,0,0.9899567604064942
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,4,4,128,1,float16,fp8,0,0.5865983963012695
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,4,4,128,1,fp8,fp8,0,0.580244779586792
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,4,1,128,1,float16,float16,0,0.49039201736450194
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,4,1,128,1,float16,fp8,0,0.4675471782684326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,4,1,128,1,fp8,fp8,0,0.4792784214019775
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,4,2,128,1,float16,float16,0,0.5328271865844727
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,4,2,128,1,float16,fp8,0,0.5056704044342041
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,4,2,128,1,fp8,fp8,0,0.506667184829712
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,4,4,128,1,float16,float16,0,0.3124880075454712
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,4,4,128,1,float16,fp8,0,0.30136001110076904
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,4,4,128,1,fp8,fp8,0,0.3020512104034424
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,4,1,128,1,float16,float16,0,0.2574160099029541
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,4,1,128,1,float16,fp8,0,0.23873600959777833
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,4,1,128,1,fp8,fp8,0,0.23930079936981202
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,4,2,128,1,float16,float16,0,0.27727839946746824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,4,2,128,1,float16,fp8,0,0.2600895881652832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,4,2,128,1,fp8,fp8,0,0.2615391969680786
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,4,1,128,1,float16,fp8,0,0.1291872024536133
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,4,4,128,1,float16,float16,0,0.16594079732894898
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,4,4,128,1,float16,fp8,0,0.1556831955909729
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,4,4,128,1,fp8,fp8,0,0.15639519691467285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,4,1,128,1,float16,float16,0,0.1328992009162903
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,4,1,128,1,fp8,fp8,0,0.12860000133514404
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,4,2,128,1,float16,float16,0,0.14272160530090333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,4,2,128,1,fp8,fp8,0,0.13686399459838866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,4,2,128,1,float16,fp8,0,0.13680319786071776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,4,1,128,1,float16,float16,0,1.1263855934143066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,4,1,128,1,float16,fp8,0,1.0815279960632325
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,4,1,128,1,fp8,fp8,0,1.077284812927246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,4,2,128,1,float16,fp8,0,1.178494358062744
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,4,2,128,1,float16,float16,0,1.2407695770263671
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,4,4,128,1,float16,float16,0,0.7751887798309326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,4,2,128,1,fp8,fp8,0,1.182419204711914
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,4,4,128,1,float16,fp8,0,0.7186816215515137
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,4,4,128,1,fp8,fp8,0,0.7188784122467041
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,4,1,128,1,float16,float16,0,0.579369592666626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,4,1,128,1,float16,fp8,0,0.5557231903076172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,4,1,128,1,fp8,fp8,0,0.5562719821929931
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,4,2,128,1,float16,float16,0,0.6398159980773925
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,4,4,128,1,fp8,fp8,0,0.37757120132446287
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,4,2,128,1,float16,fp8,0,0.6119743824005127
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,4,2,128,1,fp8,fp8,0,0.6090303897857666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,4,4,128,1,float16,float16,0,0.4058080196380615
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,4,4,128,1,float16,fp8,0,0.37564959526062014
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,4,1,128,1,float16,float16,0,0.3040976047515869
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,4,1,128,1,float16,fp8,0,0.2965199947357178
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,4,1,128,1,fp8,fp8,0,0.2967103958129883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,4,2,128,1,float16,float16,0,0.3341615915298462
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,4,2,128,1,float16,fp8,0,0.3197936058044434
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,4,2,128,1,fp8,fp8,0,0.3216736078262329
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,4,4,128,1,float16,float16,0,0.22412960529327391
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,4,4,128,1,float16,fp8,0,0.20083999633789062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,4,4,128,1,fp8,fp8,0,0.1995967984199524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,4,1,128,1,float16,float16,0,0.1664736032485962
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,4,1,128,1,float16,fp8,0,0.1587231993675232
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,4,1,128,1,fp8,fp8,0,0.15835360288619996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,4,2,128,1,float16,float16,0,0.18397599458694458
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,4,2,128,1,float16,fp8,0,0.16954400539398193
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,4,2,128,1,fp8,fp8,0,0.16968319416046143
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,4,4,128,1,float16,float16,0,0.12362079620361328
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,4,4,128,1,float16,fp8,0,0.11706720590591431
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,4,4,128,1,fp8,fp8,0,0.11772799491882324
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,4,1,128,1,float16,float16,0,0.10295039415359497
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,4,1,128,1,float16,fp8,0,0.09991679787635803
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,4,1,128,1,fp8,fp8,0,0.09887520074844361
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,4,2,128,1,float16,float16,0,0.10941280126571655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,4,2,128,1,float16,fp8,0,0.1060960054397583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,4,2,128,1,fp8,fp8,0,0.1064031958580017
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,4,1,128,1,float16,float16,0,1.094313621520996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,4,1,128,1,float16,fp8,0,1.042739200592041
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,4,1,128,1,fp8,fp8,0,1.05513277053833
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,4,2,128,1,float16,float16,0,1.2347536087036133
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,4,2,128,1,float16,fp8,0,1.1882800102233886
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,4,4,128,1,float16,float16,0,0.7865856170654297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,4,2,128,1,fp8,fp8,0,1.1919695854187011
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,4,4,128,1,float16,fp8,0,0.7574543952941895
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,4,4,128,1,fp8,fp8,0,0.7522272109985352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,4,1,128,1,float16,float16,0,0.5507840156555176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,4,1,128,1,float16,fp8,0,0.5434959888458252
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,4,1,128,1,fp8,fp8,0,0.5371888160705567
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,4,2,128,1,float16,float16,0,0.6361775875091553
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,4,2,128,1,float16,fp8,0,0.6063759803771973
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,4,4,128,1,float16,float16,0,0.40462398529052734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,4,2,128,1,fp8,fp8,0,0.60862717628479
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,4,4,128,1,float16,fp8,0,0.3880223989486694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,4,4,128,1,fp8,fp8,0,0.38980319499969485
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,4,1,128,1,float16,float16,0,0.29372320175170896
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,4,1,128,1,float16,fp8,0,0.2825648069381714
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,4,1,128,1,fp8,fp8,0,0.2812720060348511
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,4,2,128,1,float16,float16,0,0.32863359451293944
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,4,2,128,1,float16,fp8,0,0.3173135995864868
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,4,2,128,1,fp8,fp8,0,0.31648960113525393
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,4,4,128,1,float16,float16,0,0.2130687952041626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,4,2,128,1,float16,float16,0,0.1763200044631958
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,4,2,128,1,float16,fp8,0,0.16652319431304932
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,4,4,128,1,float16,fp8,0,0.20616159439086915
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,4,4,128,1,fp8,fp8,0,0.2064863920211792
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,4,1,128,1,float16,float16,0,0.15579839944839477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,4,1,128,1,float16,fp8,0,0.14612640142440797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,4,1,128,1,fp8,fp8,0,0.14589600563049315
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,4,2,128,1,fp8,fp8,0,0.1667695999145508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,4,4,128,1,float16,float16,0,0.11521439552307129
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,4,4,128,1,float16,fp8,0,0.10521919727325439
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,4,4,128,1,fp8,fp8,0,0.10571999549865722
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,4,1,128,1,float16,float16,0,0.08242400288581848
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,4,1,128,1,float16,fp8,0,0.08062719702720642
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,4,1,128,1,fp8,fp8,0,0.08081600069999695
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,4,2,128,1,float16,float16,0,0.0910095989704132
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,4,2,128,1,float16,fp8,0,0.0885919988155365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,4,2,128,1,fp8,fp8,0,0.08843680024147034
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,4,4,128,1,float16,float16,0,0.08469759821891784
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,4,1,128,1,fp8,fp8,0,0.07211999893188477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,4,4,128,1,float16,fp8,0,0.08387680053710937
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,4,4,128,1,fp8,fp8,0,0.08354079723358154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,4,1,128,1,float16,float16,0,0.0726095974445343
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,4,1,128,1,float16,fp8,0,0.07221119999885559
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,4,2,128,1,float16,float16,0,0.07688959836959838
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,4,2,128,1,float16,fp8,0,0.07688000202178955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,4,2,128,1,fp8,fp8,0,0.07593119740486146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,4,1,128,1,float16,float16,0,0.7186736106872559
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,4,1,128,1,float16,fp8,0,0.6936304092407226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,4,1,128,1,fp8,fp8,0,0.696724796295166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,4,2,128,1,float16,float16,0,0.8360383987426758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,4,2,128,1,float16,fp8,0,0.8106719970703125
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,4,4,128,1,float16,float16,0,0.5488175868988037
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,4,2,128,1,fp8,fp8,0,0.8102767944335938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,4,4,128,1,float16,fp8,0,0.5310848236083985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,4,4,128,1,fp8,fp8,0,0.532041597366333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,4,1,128,1,float16,float16,0,0.37935519218444824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,4,1,128,1,float16,fp8,0,0.36801600456237793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,4,1,128,1,fp8,fp8,0,0.3676624059677124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,4,2,128,1,float16,float16,0,0.43540802001953127
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,4,2,128,1,float16,fp8,0,0.42276320457458494
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,4,4,128,1,float16,float16,0,0.29078400135040283
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,4,1,128,1,fp8,fp8,0,0.20002241134643556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,4,2,128,1,fp8,fp8,0,0.42206082344055174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,4,4,128,1,float16,fp8,0,0.2824064016342163
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,4,4,128,1,fp8,fp8,0,0.2809887886047363
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,4,1,128,1,float16,float16,0,0.2074352025985718
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,4,1,128,1,float16,fp8,0,0.20144639015197754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,4,2,128,1,float16,float16,0,0.23428480625152587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,4,2,128,1,float16,fp8,0,0.22798080444335939
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,4,2,128,1,fp8,fp8,0,0.2277359962463379
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,4,4,128,1,float16,float16,0,0.16003680229187012
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,4,4,128,1,float16,fp8,0,0.153329598903656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,4,4,128,1,fp8,fp8,0,0.15358400344848633
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,4,1,128,1,float16,float16,0,0.11425119638442993
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,4,1,128,1,float16,fp8,0,0.11062079668045044
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,4,1,128,1,fp8,fp8,0,0.11001440286636352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,4,2,128,1,float16,float16,0,0.131222403049469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,4,2,128,1,float16,fp8,0,0.12182559967041015
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,4,2,128,1,fp8,fp8,0,0.12222880125045776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,4,4,128,1,float16,float16,0,0.08915519714355469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,4,4,128,1,float16,fp8,0,0.08562880158424377
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,4,4,128,1,fp8,fp8,0,0.08604480028152466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,4,1,128,1,float16,float16,0,0.06677759885787964
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,4,1,128,1,float16,fp8,0,0.06805279850959778
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,4,1,128,1,fp8,fp8,0,0.06754879951477051
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,4,2,128,1,float16,float16,0,0.07421600222587585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,4,2,128,1,float16,fp8,0,0.07369279861450195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,4,2,128,1,fp8,fp8,0,0.07395359873771667
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,4,4,128,1,float16,float16,0,0.07115839719772339
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,4,4,128,1,float16,fp8,0,0.07080159783363342
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,4,4,128,1,fp8,fp8,0,0.07078559994697571
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,4,1,128,1,float16,float16,0,0.052430397272109984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,4,1,128,1,float16,fp8,0,0.05288000106811523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,4,1,128,1,fp8,fp8,0,0.05252320170402527
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,4,2,128,1,float16,float16,0,0.061022400856018066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,4,2,128,1,float16,fp8,0,0.06182240247726441
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,4,2,128,1,fp8,fp8,0,0.06166560053825378
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,4,1,128,1,float16,float16,0,0.7489664077758789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,4,1,128,1,float16,fp8,0,0.7295231819152832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,4,1,128,1,fp8,fp8,0,0.7259488105773926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,4,2,128,1,float16,float16,0,0.8998047828674316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,4,2,128,1,float16,fp8,0,0.8742416381835938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,4,4,128,1,float16,float16,0,0.6215151786804199
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,4,2,128,1,fp8,fp8,0,0.8754575729370118
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,4,4,128,1,float16,fp8,0,0.6059792041778564
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,4,1,128,1,float16,float16,0,0.3934607982635498
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,4,4,128,1,fp8,fp8,0,0.6039648056030273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,4,1,128,1,float16,fp8,0,0.3811759948730469
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,4,1,128,1,fp8,fp8,0,0.3807152032852173
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,4,2,128,1,float16,float16,0,0.4693103790283203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,4,2,128,1,float16,fp8,0,0.45551838874816897
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,4,2,128,1,fp8,fp8,0,0.45475358963012696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,4,4,128,1,float16,float16,0,0.3290447950363159
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,4,4,128,1,float16,fp8,0,0.3184031963348389
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,4,4,128,1,fp8,fp8,0,0.3181504011154175
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,4,1,128,1,float16,float16,0,0.21603999137878419
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,4,1,128,1,float16,fp8,0,0.20936799049377441
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,4,1,128,1,fp8,fp8,0,0.209334397315979
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,4,2,128,1,float16,float16,0,0.2533519983291626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,4,2,128,1,float16,fp8,0,0.24491679668426514
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,4,2,128,1,fp8,fp8,0,0.24403040409088134
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,4,4,128,1,float16,float16,0,0.18007680177688598
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,4,4,128,1,float16,fp8,0,0.17732640504837036
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,4,2,128,1,float16,float16,0,0.14233280420303346
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,4,4,128,1,fp8,fp8,0,0.17716000080108643
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,4,1,128,1,float16,float16,0,0.1234063982963562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,4,1,128,1,float16,fp8,0,0.11567679643630982
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,4,1,128,1,fp8,fp8,0,0.11750400066375732
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,4,2,128,1,float16,fp8,0,0.13862080574035646
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,4,2,128,1,fp8,fp8,0,0.13766720294952392
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,4,4,128,1,float16,float16,0,0.10537120103836059
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,4,4,128,1,float16,fp8,0,0.09710239768028259
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,4,4,128,1,fp8,fp8,0,0.09680320024490356
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,4,1,128,1,float16,float16,0,0.07268959879875184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,4,1,128,1,float16,fp8,0,0.07265120148658752
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,4,1,128,1,fp8,fp8,0,0.0728272020816803
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,4,2,128,1,float16,float16,0,0.08300639986991883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,4,2,128,1,float16,fp8,0,0.07999200224876404
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,4,2,128,1,fp8,fp8,0,0.07988960146903992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,4,4,128,1,float16,float16,0,0.06266559958457947
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,4,4,128,1,float16,fp8,0,0.06293119788169861
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,4,4,128,1,fp8,fp8,0,0.06305119991302491
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,4,1,128,1,float16,float16,0,0.050704002380371094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,4,1,128,1,float16,fp8,0,0.0514240026473999
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,4,1,128,1,fp8,fp8,0,0.051286399364471436
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,4,2,128,1,float16,float16,0,0.055371201038360594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,4,2,128,1,float16,fp8,0,0.05562400221824646
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,4,2,128,1,fp8,fp8,0,0.0556768000125885
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,4,4,128,1,float16,float16,0,0.04791040122509003
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,4,4,128,1,fp8,fp8,0,0.04832639992237091
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,4,4,128,1,float16,fp8,0,0.04912160038948059
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,4,1,128,1,float16,float16,0,0.04193440079689026
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,4,1,128,1,float16,fp8,0,0.043137601017951964
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,4,1,128,1,fp8,fp8,0,0.043663999438285826
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,4,2,128,1,float16,float16,0,0.04280000030994415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,4,2,128,1,float16,fp8,0,0.043971198797225955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,4,2,128,1,fp8,fp8,0,0.04399999976158142
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,4,1,128,1,float16,float16,0,0.4944015979766846
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,4,1,128,1,float16,fp8,0,0.48604798316955566
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,4,1,128,1,fp8,fp8,0,0.4840735912322998
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,4,2,128,1,float16,float16,0,0.6106095790863038
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,4,2,128,1,float16,fp8,0,0.5957183837890625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,4,2,128,1,fp8,fp8,0,0.5952767848968505
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,4,4,128,1,float16,float16,0,0.4344927787780762
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,4,4,128,1,float16,fp8,0,0.42552800178527833
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,4,4,128,1,fp8,fp8,0,0.4254447937011719
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,4,1,128,1,float16,float16,0,0.2658047914505005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,4,1,128,1,float16,fp8,0,0.25951519012451174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,4,1,128,1,fp8,fp8,0,0.259883189201355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,4,2,128,1,float16,float16,0,0.32207679748535156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,4,2,128,1,float16,fp8,0,0.3149712085723877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,4,2,128,1,fp8,fp8,0,0.3143071889877319
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,4,4,128,1,float16,float16,0,0.23491039276123046
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,4,4,128,1,float16,fp8,0,0.22822721004486085
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,4,4,128,1,fp8,fp8,0,0.22862720489501953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,4,1,128,1,float16,float16,0,0.14899519681930543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,4,1,128,1,float16,fp8,0,0.14614720344543458
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,4,4,128,1,float16,fp8,0,0.12949440479278565
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,4,1,128,1,fp8,fp8,0,0.1460479974746704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,4,2,128,1,float16,float16,0,0.17698240280151367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,4,2,128,1,float16,fp8,0,0.17368320226669312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,4,2,128,1,fp8,fp8,0,0.1740031957626343
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,4,4,128,1,float16,float16,0,0.13131040334701538
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,4,4,128,1,fp8,fp8,0,0.1292240023612976
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,4,1,128,1,float16,float16,0,0.08602399826049804
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,4,1,128,1,float16,fp8,0,0.08302559852600097
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,4,1,128,1,fp8,fp8,0,0.08309119939804077
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,4,2,128,1,float16,float16,0,0.10267360210418701
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,4,2,128,1,float16,fp8,0,0.09540159702301025
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,4,2,128,1,fp8,fp8,0,0.09615200161933898
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,4,4,128,1,float16,float16,0,0.07606239914894104
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,4,4,128,1,float16,fp8,0,0.07246879935264587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,4,4,128,1,fp8,fp8,0,0.07274240255355835
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,4,1,128,1,float16,float16,0,0.05382400155067444
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,4,1,128,1,float16,fp8,0,0.05470079779624939
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,4,1,128,1,fp8,fp8,0,0.05491679906845093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,4,2,128,1,float16,float16,0,0.06060479879379273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,4,2,128,1,float16,fp8,0,0.06128159761428833
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,4,2,128,1,fp8,fp8,0,0.060817599296569824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,4,4,128,1,float16,float16,0,0.05869920253753662
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,4,4,128,1,float16,fp8,0,0.05948160290718078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,4,2,128,1,float16,fp8,0,0.05318719744682312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,4,4,128,1,fp8,fp8,0,0.05934399962425232
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,4,1,128,1,float16,float16,0,0.04750399887561798
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,4,1,128,1,float16,fp8,0,0.04897119998931885
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,4,1,128,1,fp8,fp8,0,0.048537600040435794
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,4,2,128,1,float16,float16,0,0.05143839716911316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,4,2,128,1,fp8,fp8,0,0.0530128002166748
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,4,4,128,1,float16,float16,0,0.03936479985713959
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,4,4,128,1,float16,fp8,0,0.039657598733901976
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,4,4,128,1,fp8,fp8,0,0.03982079923152924
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,4,1,128,1,float16,float16,0,0.035017600655555724
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,4,1,128,1,float16,fp8,0,0.036724799871444704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,4,1,128,1,fp8,fp8,0,0.03657279908657074
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,4,2,128,1,float16,float16,0,0.035883200168609616
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,4,2,128,1,float16,fp8,0,0.03749920129776001
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,4,2,128,1,fp8,fp8,0,0.03735679984092712
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,4,1,128,1,float16,float16,0,0.5486368179321289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,4,1,128,1,float16,fp8,0,0.5433040142059327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,4,2,128,1,float16,fp8,0,0.6905727863311768
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,4,1,128,1,fp8,fp8,0,0.5435999870300293
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,4,2,128,1,float16,float16,0,0.6992368221282959
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,4,1,128,1,float16,float16,0,0.29286720752716067
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,4,2,128,1,fp8,fp8,0,0.6914639949798584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,4,4,128,1,float16,float16,0,0.5173327922821045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,4,4,128,1,float16,fp8,0,0.5107471942901611
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,4,4,128,1,fp8,fp8,0,0.5110767841339111
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,4,1,128,1,float16,fp8,0,0.28881919384002686
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,4,1,128,1,fp8,fp8,0,0.287441611289978
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,4,2,128,1,float16,float16,0,0.3681488037109375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,4,2,128,1,float16,fp8,0,0.36034719944000243
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,4,2,128,1,fp8,fp8,0,0.36096160411834716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,4,4,128,1,float16,float16,0,0.2738912105560303
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,4,4,128,1,float16,fp8,0,0.2700416088104248
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,4,4,128,1,fp8,fp8,0,0.27026560306549074
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,4,1,128,1,float16,float16,0,0.16024320125579833
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,4,1,128,1,float16,fp8,0,0.16104320287704468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,4,1,128,1,fp8,fp8,0,0.15997120141983032
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,4,2,128,1,float16,float16,0,0.19782719612121583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,4,4,128,1,fp8,fp8,0,0.1509071946144104
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,4,2,128,1,float16,fp8,0,0.19708319902420043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,4,2,128,1,fp8,fp8,0,0.19648319482803345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,4,4,128,1,float16,float16,0,0.1501296043395996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,4,4,128,1,float16,fp8,0,0.15083839893341064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,4,1,128,1,float16,float16,0,0.09417759776115417
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,4,1,128,1,float16,fp8,0,0.08965280055999755
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,4,1,128,1,fp8,fp8,0,0.0891439974308014
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,4,2,128,1,float16,float16,0,0.1133679986000061
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,4,2,128,1,float16,fp8,0,0.11029599905014038
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,4,2,128,1,fp8,fp8,0,0.11062239408493042
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,4,4,128,1,float16,float16,0,0.0900592029094696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,4,4,128,1,float16,fp8,0,0.08089600205421447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,4,4,128,1,fp8,fp8,0,0.08157439827919007
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,4,1,128,1,float16,float16,0,0.05713599920272827
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,4,1,128,1,float16,fp8,0,0.05743359923362732
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,4,1,128,1,fp8,fp8,0,0.05699359774589539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,4,2,128,1,float16,fp8,0,0.06443359851837158
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,4,2,128,1,float16,float16,0,0.06649760007858277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,4,2,128,1,fp8,fp8,0,0.06461920142173767
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,4,4,128,1,float16,float16,0,0.05076320171356201
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,4,4,128,1,float16,fp8,0,0.05113599896430969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,4,4,128,1,fp8,fp8,0,0.05177599787712097
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,4,1,128,1,float16,float16,0,0.038068801164627075
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,4,1,128,1,float16,fp8,0,0.03935360014438629
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,4,1,128,1,fp8,fp8,0,0.03924480080604553
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,4,2,128,1,float16,float16,0,0.0426256000995636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,4,2,128,1,float16,fp8,0,0.04370239973068237
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,4,2,128,1,fp8,fp8,0,0.04392319917678833
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,4,4,128,1,float16,float16,0,0.04090720117092132
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,4,4,128,1,float16,fp8,0,0.04331839978694916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,4,4,128,1,fp8,fp8,0,0.04304159879684448
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,4,1,128,1,float16,float16,0,0.035755199193954465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,4,1,128,1,float16,fp8,0,0.03761920034885406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,4,1,128,1,fp8,fp8,0,0.0376336008310318
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,4,2,128,1,float16,float16,0,0.03663359880447388
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,4,2,128,1,float16,fp8,0,0.038652798533439635
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,4,2,128,1,fp8,fp8,0,0.03855679929256439
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,4,4,128,1,float16,float16,0,0.03250080049037933
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,4,4,128,1,float16,fp8,0,0.032662400603294374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,4,4,128,1,fp8,fp8,0,0.03250400125980377
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,4,1,128,1,float16,float16,0,0.030985599756240843
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,4,1,128,1,float16,fp8,0,0.031228798627853393
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,4,1,128,1,fp8,fp8,0,0.031112000346183777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,4,2,128,1,float16,float16,0,0.03165920078754425
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,4,1,128,1,fp8,fp8,0,0.3992271900177002
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,4,2,128,1,float16,fp8,0,0.03155519962310791
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,4,2,128,1,fp8,fp8,0,0.031497600674629214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,4,2,128,1,float16,fp8,0,0.5505551815032959
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,4,1,128,1,float16,float16,0,0.40009279251098634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,4,1,128,1,float16,fp8,0,0.3998192071914673
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,4,2,128,1,float16,float16,0,0.550055980682373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,4,2,128,1,fp8,fp8,0,0.5490416049957275
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,4,4,128,1,float16,float16,0,0.43815197944641116
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,4,4,128,1,float16,fp8,0,0.4384335994720459
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,4,1,128,1,float16,float16,0,0.2122096061706543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,4,4,128,1,fp8,fp8,0,0.43836321830749514
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,4,1,128,1,float16,fp8,0,0.212990403175354
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,4,1,128,1,fp8,fp8,0,0.2139440059661865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,4,2,128,1,float16,float16,0,0.28796160221099854
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,4,2,128,1,float16,fp8,0,0.2865855932235718
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,4,2,128,1,fp8,fp8,0,0.287390398979187
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,4,4,128,1,float16,float16,0,0.2315056085586548
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,4,4,128,1,float16,fp8,0,0.23081281185150146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,4,4,128,1,fp8,fp8,0,0.23173279762268068
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,4,1,128,1,float16,float16,0,0.11972479820251465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,4,4,128,1,float16,float16,0,0.1285823941230774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,4,1,128,1,float16,fp8,0,0.12105920314788818
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,4,1,128,1,fp8,fp8,0,0.12069120407104492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,4,2,128,1,float16,float16,0,0.1563680052757263
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,4,2,128,1,float16,fp8,0,0.15697920322418213
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,4,2,128,1,fp8,fp8,0,0.15692800283432007
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,4,4,128,1,float16,fp8,0,0.12889440059661866
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,4,4,128,1,fp8,fp8,0,0.1285904049873352
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,4,1,128,1,float16,float16,0,0.07170559763908387
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,4,1,128,1,float16,fp8,0,0.06650720238685608
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,4,1,128,1,fp8,fp8,0,0.0681007981300354
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,4,2,128,1,float16,float16,0,0.09149919748306275
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,4,2,128,1,float16,fp8,0,0.08900480270385742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,4,2,128,1,fp8,fp8,0,0.08755519986152649
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,4,4,128,1,float16,float16,0,0.07587839961051941
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,4,4,128,1,float16,fp8,0,0.06938880085945129
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,4,4,128,1,fp8,fp8,0,0.06940479874610901
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,4,1,128,1,float16,float16,0,0.04225119948387146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,4,1,128,1,float16,fp8,0,0.043515199422836305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,4,1,128,1,fp8,fp8,0,0.04353759884834289
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,4,2,128,1,float16,float16,0,0.05165119767189026
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,4,2,128,1,float16,fp8,0,0.05122079849243164
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,4,2,128,1,fp8,fp8,0,0.05118240118026733
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,4,4,128,1,float16,float16,0,0.04116480052471161
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,4,4,128,1,float16,fp8,0,0.042396798729896545
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,4,4,128,1,fp8,fp8,0,0.042475199699401854
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,4,1,128,1,float16,float16,0,0.029096001386642457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,4,1,128,1,float16,fp8,0,0.030825600028038025
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,4,1,128,1,fp8,fp8,0,0.030822399258613586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,4,2,128,1,float16,float16,0,0.03327359855175018
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,4,2,128,1,float16,fp8,0,0.03508960008621216
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,4,2,128,1,fp8,fp8,0,0.03479360044002533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,4,4,128,1,float16,float16,0,0.03300800025463104
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,4,4,128,1,float16,fp8,0,0.03490239977836609
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,4,4,128,1,fp8,fp8,0,0.03488639891147614
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,4,1,128,1,float16,float16,0,0.027393600344657897
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,4,1,128,1,float16,fp8,0,0.02943359911441803
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,4,1,128,1,fp8,fp8,0,0.029054400324821473
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,4,2,128,1,float16,float16,0,0.028585600852966308
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,4,2,128,1,float16,fp8,0,0.03043839931488037
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,4,2,128,1,fp8,fp8,0,0.03041119873523712
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,4,4,128,1,float16,float16,0,0.028391999006271363
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,4,4,128,1,float16,fp8,0,0.03031040132045746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,4,4,128,1,fp8,fp8,0,0.030214399099349976
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,4,1,128,1,float16,float16,0,0.027030399441719054
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,4,1,128,1,float16,fp8,0,0.028815999627113342
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,4,1,128,1,fp8,fp8,0,0.02892000079154968
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,4,2,128,1,float16,float16,0,0.027438399195671082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,4,2,128,1,float16,fp8,0,0.029185599088668822
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,4,2,128,1,fp8,fp8,0,0.02918879985809326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,4,4,128,1,float16,float16,0,0.026734399795532226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,4,4,128,1,float16,fp8,0,0.028916800022125246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,4,4,128,1,fp8,fp8,0,0.02871040105819702
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,4,1,128,1,float16,float16,0,0.026572799682617186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,4,1,128,1,float16,fp8,0,0.028321599960327147
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,4,1,128,1,fp8,fp8,0,0.028324800729751586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,4,2,128,1,float16,float16,0,0.026556798815727235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,4,2,128,1,float16,fp8,0,0.028484800457954408
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,4,2,128,1,fp8,fp8,0,0.028707200288772584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,4,1,128,1,float16,float16,0,0.18924319744110107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,4,1,128,1,float16,fp8,0,0.19188159704208374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,4,1,128,1,fp8,fp8,0,0.19160959720611573
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,4,2,128,1,float16,float16,0,0.2652559995651245
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,4,2,128,1,float16,fp8,0,0.2655103921890259
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,4,2,128,1,fp8,fp8,0,0.2655488014221191
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,4,4,128,1,float16,float16,0,0.22103519439697267
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,4,4,128,1,float16,fp8,0,0.21599841117858887
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,4,4,128,1,fp8,fp8,0,0.2160383939743042
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,4,1,128,1,float16,float16,0,0.10750399827957154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,4,1,128,1,float16,fp8,0,0.10872479677200317
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,4,1,128,1,fp8,fp8,0,0.10914399623870849
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,4,2,128,1,float16,float16,0,0.14473600387573243
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,4,2,128,1,float16,fp8,0,0.1451040029525757
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,4,2,128,1,fp8,fp8,0,0.1453536033630371
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,4,4,128,1,float16,float16,0,0.12164160013198852
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,4,4,128,1,float16,fp8,0,0.11918879747390747
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,4,4,128,1,fp8,fp8,0,0.11907520294189453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,4,1,128,1,float16,float16,0,0.06504799723625183
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,4,1,128,1,float16,fp8,0,0.06230400204658508
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,4,1,128,1,fp8,fp8,0,0.06142079830169678
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,4,2,128,1,float16,float16,0,0.08411039710044861
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,4,2,128,1,float16,fp8,0,0.08089759945869446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,4,2,128,1,fp8,fp8,0,0.08059999942779542
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,4,4,128,1,float16,float16,0,0.0697983980178833
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,4,4,128,1,float16,fp8,0,0.061887997388839724
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,4,4,128,1,fp8,fp8,0,0.06223679780960083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,4,1,128,1,float16,float16,0,0.03705919981002807
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,4,1,128,1,float16,fp8,0,0.03867999911308288
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,4,1,128,1,fp8,fp8,0,0.03873920142650604
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,4,2,128,1,float16,float16,0,0.046115198731422426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,4,2,128,1,float16,fp8,0,0.046249601244926455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,4,2,128,1,fp8,fp8,0,0.046265599131584165
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,4,4,128,1,float16,float16,0,0.03794400095939636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,4,4,128,1,float16,fp8,0,0.03878880143165588
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,4,4,128,1,fp8,fp8,0,0.038806399703025816
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,4,1,128,1,float16,float16,0,0.025916799902915955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,4,1,128,1,float16,fp8,0,0.02691200077533722
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,4,4,128,1,float16,fp8,0,0.031281599402427675
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,4,1,128,1,fp8,fp8,0,0.026756799221038817
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,4,2,128,1,float16,float16,0,0.030353599786758424
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,4,2,128,1,float16,fp8,0,0.031020799279212953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,4,2,128,1,fp8,fp8,0,0.03127520084381104
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,4,4,128,1,float16,float16,0,0.030054399371147157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,4,4,128,1,fp8,fp8,0,0.030955201387405394
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,4,1,128,1,float16,float16,0,0.024619199335575104
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,4,1,128,1,float16,fp8,0,0.02558239996433258
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,4,1,128,1,fp8,fp8,0,0.025433599948883057
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,4,2,128,1,float16,float16,0,0.025747200846672057
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,4,2,128,1,float16,fp8,0,0.026662400364875792
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,4,2,128,1,fp8,fp8,0,0.02635200023651123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,4,4,128,1,float16,float16,0,0.025516799092292784
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,4,4,128,1,float16,fp8,0,0.026289600133895873
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,4,4,128,1,fp8,fp8,0,0.02704319953918457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,4,1,128,1,float16,float16,0,0.024055999517440797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,4,1,128,1,float16,fp8,0,0.025076800584793092
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,4,1,128,1,fp8,fp8,0,0.025364801287651062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,4,2,128,1,float16,float16,0,0.024083200097084045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,4,2,128,1,float16,fp8,0,0.025547200441360475
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,4,2,128,1,fp8,fp8,0,0.025775998830795288
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,4,4,128,1,float16,float16,0,0.024195200204849242
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,4,4,128,1,float16,fp8,0,0.025241601467132568
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,4,4,128,1,fp8,fp8,0,0.02532159984111786
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,4,1,128,1,float16,float16,0,0.0235727995634079
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,4,1,128,1,float16,fp8,0,0.024376000463962554
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,4,1,128,1,fp8,fp8,0,0.024710400402545928
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,4,2,128,1,float16,float16,0,0.023835200071334838
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,4,2,128,1,float16,fp8,0,0.024820800125598907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,4,2,128,1,fp8,fp8,0,0.024851199984550477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,4,4,128,1,float16,float16,0,0.022116799652576447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,4,4,128,1,float16,fp8,0,0.023552000522613525
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,4,4,128,1,fp8,fp8,0,0.02351039946079254
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,4,1,128,1,float16,float16,0,0.022071999311447144
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,4,1,128,1,float16,fp8,0,0.02351839989423752
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,4,1,128,1,fp8,fp8,0,0.023280000686645506
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,4,2,128,1,float16,float16,0,0.022430400550365447
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,4,2,128,1,float16,fp8,0,0.023632000386714935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,4,2,128,1,fp8,fp8,0,0.023635199666023253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,4,1,128,1,float16,float16,0,0.10463199615478516
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,4,1,128,1,float16,fp8,0,0.10595680475234985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,4,1,128,1,fp8,fp8,0,0.10586080551147461
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,4,2,128,1,float16,float16,0,0.14255839586257935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,4,2,128,1,float16,fp8,0,0.14145599603652953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,4,2,128,1,fp8,fp8,0,0.14165600538253784
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,4,4,128,1,float16,float16,0,0.12016799449920654
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,4,4,128,1,float16,fp8,0,0.1152608036994934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,4,4,128,1,fp8,fp8,0,0.11545759439468384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,4,1,128,1,float16,float16,0,0.062319999933242796
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,4,1,128,1,float16,fp8,0,0.057417601346969604
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,4,1,128,1,fp8,fp8,0,0.05676320195198059
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,4,2,128,1,float16,float16,0,0.08218399882316589
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,4,2,128,1,float16,fp8,0,0.07710719704627991
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,4,2,128,1,fp8,fp8,0,0.07667040228843688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,4,4,128,1,float16,float16,0,0.06817600131034851
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,4,4,128,1,float16,fp8,0,0.05910239815711975
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,4,4,128,1,fp8,fp8,0,0.059438401460647584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,4,2,128,1,float16,fp8,0,0.04296799898147583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,4,1,128,1,float16,float16,0,0.03495039939880371
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,4,1,128,1,float16,fp8,0,0.03573760092258453
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,4,1,128,1,fp8,fp8,0,0.035559999942779544
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,4,2,128,1,float16,float16,0,0.04327360093593598
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,4,2,128,1,fp8,fp8,0,0.043131199479103086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,4,4,128,1,float16,float16,0,0.03749920129776001
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,4,4,128,1,float16,fp8,0,0.037206399440765384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,4,4,128,1,fp8,fp8,0,0.037371200323104856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,4,1,128,1,float16,float16,0,0.025231999158859254
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,4,1,128,1,float16,fp8,0,0.02569279968738556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,4,1,128,1,fp8,fp8,0,0.0256415992975235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,4,2,128,1,float16,float16,0,0.029016000032424927
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,4,2,128,1,float16,fp8,0,0.030055999755859375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,4,2,128,1,fp8,fp8,0,0.030103999376296996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,4,4,128,1,float16,float16,0,0.028468799591064454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,4,4,128,1,float16,fp8,0,0.029228800535202028
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,4,4,128,1,fp8,fp8,0,0.02924799919128418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,4,1,128,1,float16,float16,0,0.023177599906921385
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,4,1,128,1,float16,fp8,0,0.02359039932489395
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,4,1,128,1,fp8,fp8,0,0.023764799535274505
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,4,2,128,1,float16,float16,0,0.02388480007648468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,4,2,128,1,float16,fp8,0,0.02468799948692322
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,4,2,128,1,fp8,fp8,0,0.02455520033836365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,4,4,128,1,float16,float16,0,0.02412160038948059
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,4,4,128,1,float16,fp8,0,0.02475520074367523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,4,4,128,1,fp8,fp8,0,0.024475200474262236
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,4,1,128,1,float16,float16,0,0.02247679978609085
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,4,1,128,1,float16,fp8,0,0.02319840043783188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,4,1,128,1,fp8,fp8,0,0.023281599581241607
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,4,2,128,1,float16,float16,0,0.022972799837589264
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,4,2,128,1,float16,fp8,0,0.0234592005610466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,4,2,128,1,fp8,fp8,0,0.0237296000123024
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,4,4,128,1,float16,float16,0,0.022577600181102754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,4,4,128,1,float16,fp8,0,0.023236800730228425
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,4,4,128,1,fp8,fp8,0,0.0229312002658844
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,4,1,128,1,float16,float16,0,0.021860800683498383
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,4,1,128,1,float16,fp8,0,0.02282080054283142
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,4,1,128,1,fp8,fp8,0,0.022708800435066224
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,4,2,128,1,float16,float16,0,0.02213120013475418
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,4,2,128,1,float16,fp8,0,0.022881600260734557
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,4,2,128,1,fp8,fp8,0,0.02272160053253174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,4,4,128,1,float16,float16,0,0.020880000293254854
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,4,4,128,1,float16,fp8,0,0.021427200734615327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,4,4,128,1,fp8,fp8,0,0.02130880057811737
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,4,1,128,1,float16,float16,0,0.020819200575351714
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,4,1,128,1,float16,fp8,0,0.021331200003623964
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,4,1,128,1,fp8,fp8,0,0.02128800004720688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,4,2,128,1,float16,float16,0,0.020921599864959717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,4,2,128,1,float16,fp8,0,0.02165919989347458
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,4,2,128,1,fp8,fp8,0,0.02139520049095154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,4,4,128,1,float16,float16,0,0.019916799664497376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,4,4,128,1,float16,fp8,0,0.02078080028295517
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,4,4,128,1,fp8,fp8,0,0.02070239931344986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,4,1,128,1,float16,float16,0,0.01974720060825348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,4,1,128,1,float16,fp8,0,0.020729599893093108
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,4,1,128,1,fp8,fp8,0,0.020588800311088562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,4,2,128,1,float16,float16,0,0.02003519982099533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,4,2,128,1,float16,float16,0,0.0830128014087677
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,4,2,128,1,float16,fp8,0,0.020769600570201874
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,4,2,128,1,fp8,fp8,0,0.020584000647068022
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,4,1,128,1,float16,float16,0,0.06317600011825561
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,4,1,128,1,float16,fp8,0,0.05878080129623413
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,4,1,128,1,fp8,fp8,0,0.05756800174713135
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,4,2,128,1,float16,fp8,0,0.07818719744682312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,4,2,128,1,fp8,fp8,0,0.07802079916000366
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,4,4,128,1,float16,float16,0,0.07668160200119019
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,4,4,128,1,float16,fp8,0,0.06902239918708801
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,4,4,128,1,fp8,fp8,0,0.07042400240898132
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,4,1,128,1,float16,float16,0,0.03576320111751556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,4,1,128,1,float16,fp8,0,0.036457601189613345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,4,1,128,1,fp8,fp8,0,0.03675679862499237
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,4,2,128,1,float16,float16,0,0.04421280026435852
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,4,2,128,1,float16,fp8,0,0.043833601474761966
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,4,2,128,1,fp8,fp8,0,0.04390400052070618
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,4,4,128,1,float16,float16,0,0.040766400098800656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,4,4,128,1,float16,fp8,0,0.04209440052509308
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,4,4,128,1,fp8,fp8,0,0.04214560091495514
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,4,1,128,1,float16,float16,0,0.025676798820495606
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,4,1,128,1,float16,fp8,0,0.026412799954414368
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,4,1,128,1,fp8,fp8,0,0.02622239887714386
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,4,2,128,1,float16,float16,0,0.030177599191665648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,4,2,128,1,float16,fp8,0,0.030350399017333985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,4,2,128,1,fp8,fp8,0,0.0307343989610672
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,4,4,128,1,float16,float16,0,0.028468799591064454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,4,4,128,1,float16,fp8,0,0.029468798637390138
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,4,4,128,1,fp8,fp8,0,0.02972640097141266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,4,1,128,1,float16,float16,0,0.02319840043783188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,4,1,128,1,float16,fp8,0,0.0241007998585701
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,4,1,128,1,fp8,fp8,0,0.02420320063829422
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,4,2,128,1,float16,float16,0,0.024376000463962554
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,4,2,128,1,float16,fp8,0,0.025196799635887147
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,4,2,128,1,fp8,fp8,0,0.02516320049762726
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,4,4,128,1,float16,float16,0,0.02370080053806305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,4,4,128,1,float16,fp8,0,0.024451200664043427
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,4,4,128,1,fp8,fp8,0,0.02439360022544861
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,4,1,128,1,float16,float16,0,0.022785599529743194
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,4,1,128,1,float16,fp8,0,0.023046399652957916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,4,1,128,1,fp8,fp8,0,0.023503999412059783
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,4,2,128,1,float16,float16,0,0.02316640019416809
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,4,2,128,1,float16,fp8,0,0.023897600173950196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,4,2,128,1,fp8,fp8,0,0.023494400084018707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,4,4,128,1,float16,float16,0,0.02260800004005432
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,4,4,128,1,float16,fp8,0,0.023281599581241607
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,4,4,128,1,fp8,fp8,0,0.023123200237751006
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,4,1,128,1,float16,float16,0,0.021911999583244322
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,4,1,128,1,float16,fp8,0,0.02290080040693283
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,4,1,128,1,fp8,fp8,0,0.022843199968338012
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,4,2,128,1,float16,float16,0,0.022307200729846953
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,4,2,128,1,float16,fp8,0,0.022864000499248506
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,4,2,128,1,fp8,fp8,0,0.022761599719524385
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,4,4,128,1,float16,float16,0,0.020908799767494202
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,4,4,128,1,float16,fp8,0,0.021374399960041045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,4,4,128,1,fp8,fp8,0,0.021214400231838227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,4,1,128,1,float16,float16,0,0.020587199926376344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,4,1,128,1,float16,fp8,0,0.02138720005750656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,4,1,128,1,fp8,fp8,0,0.021508799493312837
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,4,2,128,1,float16,float16,0,0.020796799659729005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,4,2,128,1,float16,fp8,0,0.021508799493312837
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,4,2,128,1,fp8,fp8,0,0.02136960029602051
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,4,4,128,1,float16,float16,0,0.019785599410533906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,4,4,128,1,float16,fp8,0,0.02050720006227493
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,4,4,128,1,fp8,fp8,0,0.020392000675201416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,4,1,128,1,float16,float16,0,0.01972000002861023
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,4,1,128,1,float16,fp8,0,0.020446400344371795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,4,1,128,1,fp8,fp8,0,0.020776000618934632
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,4,2,128,1,float16,float16,0,0.019908800721168518
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,4,2,128,1,float16,fp8,0,0.020750400424003602
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,4,2,128,1,fp8,fp8,0,0.020644800364971162
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,4,4,128,1,float16,float16,0,0.01930239945650101
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,4,4,128,1,float16,fp8,0,0.020292800664901734
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,4,4,128,1,fp8,fp8,0,0.019977599382400513
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,4,1,128,1,float16,float16,0,0.019462400674819948
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,4,1,128,1,float16,fp8,0,0.02035519927740097
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,4,1,128,1,fp8,fp8,0,0.020230400562286376
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,4,2,128,1,float16,float16,0,0.019662399590015412
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,4,2,128,1,float16,fp8,0,0.020347200334072113
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,4,2,128,1,fp8,fp8,0,0.02017119973897934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,4,1,128,1,float16,float16,0,0.03737919926643372
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,4,1,128,1,float16,fp8,0,0.03892799913883209
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,4,1,128,1,fp8,fp8,0,0.038971200585365295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,4,2,128,1,float16,float16,0,0.05375199913978577
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,4,2,128,1,float16,fp8,0,0.05597119927406311
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,4,2,128,1,fp8,fp8,0,0.05481119751930237
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,4,4,128,1,float16,float16,0,0.049595201015472413
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,4,4,128,1,float16,fp8,0,0.052020800113677976
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,4,4,128,1,fp8,fp8,0,0.05256159901618958
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,4,1,128,1,float16,float16,0,0.02670240104198456
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,4,1,128,1,float16,fp8,0,0.02754719853401184
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,4,1,128,1,fp8,fp8,0,0.02741760015487671
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,4,2,128,1,float16,float16,0,0.034694400429725644
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,4,2,128,1,float16,fp8,0,0.036103999614715575
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,4,2,128,1,fp8,fp8,0,0.03617759943008423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,4,4,128,1,float16,float16,0,0.0328575998544693
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,4,4,128,1,float16,fp8,0,0.03474400043487549
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,4,4,128,1,fp8,fp8,0,0.0345984011888504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,4,1,128,1,float16,float16,0,0.023756800591945647
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,4,1,128,1,float16,fp8,0,0.0248879998922348
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,4,1,128,1,fp8,fp8,0,0.024766400456428528
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,4,2,128,1,float16,float16,0,0.02494560033082962
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,4,2,128,1,float16,fp8,0,0.025646400451660157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,4,2,128,1,fp8,fp8,0,0.025622400641441345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,4,4,128,1,float16,float16,0,0.024060800671577454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,4,4,128,1,float16,fp8,0,0.025006398558616638
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,4,4,128,1,fp8,fp8,0,0.025003200769424437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,4,1,128,1,float16,float16,0,0.022915199398994446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,4,1,128,1,float16,fp8,0,0.0236175999045372
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,4,1,128,1,fp8,fp8,0,0.023763200640678404
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,4,2,128,1,float16,float16,0,0.022910399734973906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,4,2,128,1,float16,fp8,0,0.02380480021238327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,4,2,128,1,fp8,fp8,0,0.02385440021753311
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,4,4,128,1,float16,float16,0,0.022758400440216063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,4,4,128,1,float16,fp8,0,0.023446400463581086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,4,4,128,1,fp8,fp8,0,0.023579199612140656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,4,1,128,1,float16,float16,0,0.02191520035266876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,4,1,128,1,float16,fp8,0,0.022648000717163087
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,4,1,128,1,fp8,fp8,0,0.02292640060186386
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,4,2,128,1,float16,float16,0,0.02216159999370575
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,4,2,128,1,float16,fp8,0,0.023160000145435334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,4,2,128,1,fp8,fp8,0,0.02313600033521652
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,4,4,128,1,float16,float16,0,0.020916800200939178
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,4,4,128,1,float16,fp8,0,0.021695999801158904
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,4,4,128,1,fp8,fp8,0,0.021638399362564086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,4,1,128,1,float16,float16,0,0.020667199790477753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,4,1,128,1,float16,fp8,0,0.02142080068588257
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,4,1,128,1,fp8,fp8,0,0.021558399498462676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,4,2,128,1,float16,float16,0,0.020979200303554536
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,4,2,128,1,float16,fp8,0,0.021670399606227873
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,4,2,128,1,fp8,fp8,0,0.02155199944972992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,4,4,128,1,float16,float16,0,0.019940799474716185
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,4,4,128,1,float16,fp8,0,0.020503999292850496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,4,4,128,1,fp8,fp8,0,0.02046400010585785
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,4,1,128,1,float16,float16,0,0.019681599736213685
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,4,1,128,1,float16,fp8,0,0.020795199275016784
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,4,1,128,1,fp8,fp8,0,0.020843200385570526
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,4,2,128,1,float16,float16,0,0.019550399482250215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,4,2,128,1,float16,fp8,0,0.020313599705696107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,4,2,128,1,fp8,fp8,0,0.020337599515914916
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,4,4,128,1,float16,float16,0,0.019305600225925444
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,4,4,128,1,float16,fp8,0,0.020318399369716644
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,4,4,128,1,fp8,fp8,0,0.020417599380016326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,4,1,128,1,float16,float16,0,0.019340799748897554
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,4,1,128,1,float16,fp8,0,0.02033119946718216
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,4,1,128,1,fp8,fp8,0,0.020190399885177613
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,4,2,128,1,float16,float16,0,0.019156800210475923
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,4,2,128,1,float16,fp8,0,0.02022079974412918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,4,2,128,1,fp8,fp8,0,0.0199535995721817
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,4,4,128,1,float16,float16,0,0.019407999515533448
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,4,4,128,1,float16,fp8,0,0.02022400051355362
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,4,4,128,1,fp8,fp8,0,0.02012320011854172
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,4,1,128,1,float16,float16,0,0.01921280026435852
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,4,1,128,1,float16,fp8,0,0.02003999948501587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,4,1,128,1,fp8,fp8,0,0.02027679979801178
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,4,2,128,1,float16,float16,0,0.01932159960269928
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,4,2,128,1,float16,fp8,0,0.019969600439071655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,4,2,128,1,fp8,fp8,0,0.02011519968509674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,4,1,128,1,float16,float16,0,0.032532799243927005
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,4,1,128,1,float16,fp8,0,0.03414719998836517
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,4,1,128,1,fp8,fp8,0,0.03452959954738617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,4,2,128,1,float16,float16,0,0.044084799289703366
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,4,2,128,1,float16,fp8,0,0.04695200026035309
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,4,1,128,1,fp8,fp8,0,0.025974398851394652
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,4,2,128,1,fp8,fp8,0,0.04755040109157562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,4,4,128,1,float16,float16,0,0.04156160056591034
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,4,4,128,1,float16,fp8,0,0.044968000054359435
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,4,4,128,1,fp8,fp8,0,0.04470719993114471
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,4,1,128,1,float16,float16,0,0.025155198574066163
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,4,1,128,1,float16,fp8,0,0.026017600297927858
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,4,2,128,1,float16,float16,0,0.030112001299858093
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,4,2,128,1,float16,fp8,0,0.031323200464248656
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,4,2,128,1,fp8,fp8,0,0.03146879971027374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,4,2,128,1,float16,float16,0,0.024107199907302857
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,4,4,128,1,float16,float16,0,0.028492799401283263
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,4,4,128,1,float16,fp8,0,0.030251199007034303
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,4,4,128,1,fp8,fp8,0,0.030124801397323608
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,4,1,128,1,float16,float16,0,0.023398399353027344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,4,1,128,1,float16,fp8,0,0.02423039972782135
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,4,1,128,1,fp8,fp8,0,0.024115200340747833
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,4,2,128,1,float16,fp8,0,0.02476319968700409
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,4,2,128,1,fp8,fp8,0,0.024801599979400634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,4,4,128,1,float16,float16,0,0.02316959947347641
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,4,4,128,1,float16,fp8,0,0.024087999761104584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,4,4,128,1,fp8,fp8,0,0.023928000032901763
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,4,1,128,1,float16,float16,0,0.022595199942588805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,4,1,128,1,float16,fp8,0,0.023611199855804444
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,4,1,128,1,fp8,fp8,0,0.02340800017118454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,4,2,128,1,float16,float16,0,0.02274879962205887
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,4,2,128,1,float16,fp8,0,0.023665599524974823
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,4,2,128,1,fp8,fp8,0,0.023633599281311035
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,4,4,128,1,float16,float16,0,0.02110559940338135
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,4,4,128,1,float16,fp8,0,0.021916800737380983
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,4,4,128,1,fp8,fp8,0,0.02221920043230057
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,4,1,128,1,float16,float16,0,0.020636799931526183
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,4,1,128,1,float16,fp8,0,0.021587200462818146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,4,1,128,1,fp8,fp8,0,0.021620799601078034
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,4,2,128,1,float16,float16,0,0.02101760059595108
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,4,2,128,1,float16,fp8,0,0.021723200380802155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,4,2,128,1,fp8,fp8,0,0.021860800683498383
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,4,4,128,1,float16,float16,0,0.020236800611019134
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,4,4,128,1,float16,fp8,0,0.021388800442218782
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,4,4,128,1,fp8,fp8,0,0.021160000562667848
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,4,1,128,1,float16,float16,0,0.019724799692630766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,4,1,128,1,float16,fp8,0,0.020558400452136992
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,4,1,128,1,fp8,fp8,0,0.02040639966726303
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,4,2,128,1,float16,float16,0,0.019998399913311003
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,4,2,128,1,float16,fp8,0,0.02083359956741333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,4,2,128,1,fp8,fp8,0,0.020584000647068022
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,4,1,128,1,fp8,fp8,0,0.020212799310684204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,4,4,128,1,float16,float16,0,0.0196383997797966
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,4,4,128,1,float16,fp8,0,0.020265600085258482
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,4,4,128,1,fp8,fp8,0,0.020252799987792967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,4,1,128,1,float16,float16,0,0.019275200366973878
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,4,1,128,1,float16,fp8,0,0.02004159986972809
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,4,2,128,1,float16,float16,0,0.019513599574565887
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,4,2,128,1,float16,fp8,0,0.020153599977493285
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,4,2,128,1,fp8,fp8,0,0.020343999564647674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,4,1,128,1,fp8,fp8,0,0.019966399669647215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,4,4,128,1,float16,float16,0,0.01919520050287247
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,4,4,128,1,float16,fp8,0,0.02014240026473999
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,4,4,128,1,fp8,fp8,0,0.020175999402999877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,4,1,128,1,float16,float16,0,0.01913760006427765
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,4,1,128,1,float16,fp8,0,0.019923199713230134
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,4,2,128,1,float16,float16,0,0.019398400187492372
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,4,2,128,1,float16,fp8,0,0.020046399533748628
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,4,2,128,1,fp8,fp8,0,0.019843199849128725
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,4,4,128,1,float16,float16,0,0.019176000356674196
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,4,4,128,1,float16,fp8,0,0.020099200308322906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,4,4,128,1,fp8,fp8,0,0.020078399777412416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,4,1,128,1,float16,float16,0,0.02949439883232117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,4,2,128,1,fp8,fp8,0,0.01998240053653717
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,4,1,128,1,float16,float16,0,0.019300800561904908
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,4,1,128,1,float16,fp8,0,0.019888000190258028
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,4,1,128,1,fp8,fp8,0,0.019963200390338897
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,4,2,128,1,float16,float16,0,0.019046400487422944
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,4,2,128,1,float16,fp8,0,0.01989919990301132
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,4,1,128,1,float16,fp8,0,0.03102560043334961
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,4,1,128,1,fp8,fp8,0,0.0312032014131546
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,4,2,128,1,float16,float16,0,0.037231999635696414
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,4,2,128,1,float16,fp8,0,0.04016480147838593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,4,2,128,1,fp8,fp8,0,0.04037919938564301
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,4,4,128,1,float16,float16,0,0.03410240113735199
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,4,4,128,1,float16,fp8,0,0.037302398681640626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,4,4,128,1,fp8,fp8,0,0.03740000128746033
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,4,1,128,1,float16,float16,0,0.022551999986171724
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,4,1,128,1,float16,fp8,0,0.023345600068569183
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,4,1,128,1,fp8,fp8,0,0.023948800563812257
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,4,2,128,1,float16,float16,0,0.02670240104198456
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,4,2,128,1,float16,fp8,0,0.028484800457954408
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,4,2,128,1,fp8,fp8,0,0.027953600883483885
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,4,4,128,1,float16,float16,0,0.024897600710391998
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,4,4,128,1,float16,fp8,0,0.026716798543930054
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,4,4,128,1,fp8,fp8,0,0.026652801036834716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,4,1,128,1,float16,float16,0,0.020763200521469117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,4,1,128,1,float16,fp8,0,0.02187040001153946
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,4,1,128,1,fp8,fp8,0,0.02192640006542206
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,4,2,128,1,float16,float16,0,0.021536000072956085
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,4,2,128,1,float16,fp8,0,0.02220959961414337
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,4,2,128,1,fp8,fp8,0,0.021960000693798064
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,4,4,128,1,float16,float16,0,0.020524799823760986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,4,4,128,1,float16,fp8,0,0.02126079946756363
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,4,4,128,1,fp8,fp8,0,0.021241599321365358
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,4,1,128,1,float16,float16,0,0.02014880031347275
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,4,1,128,1,float16,fp8,0,0.020824000239372253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,4,1,128,1,fp8,fp8,0,0.020857599377632142
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,4,2,128,1,float16,float16,0,0.02019840031862259
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,4,2,128,1,float16,fp8,0,0.021078400313854218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,4,2,128,1,fp8,fp8,0,0.02111999988555908
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,4,4,128,1,float16,float16,0,0.019705599546432494
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,4,4,128,1,float16,fp8,0,0.020440000295639037
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,4,4,128,1,fp8,fp8,0,0.0208079993724823
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,4,1,128,1,float16,float16,0,0.019543999433517457
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,4,1,128,1,float16,fp8,0,0.020398400723934174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,4,1,128,1,fp8,fp8,0,0.0203232005238533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,4,2,128,1,float16,float16,0,0.01945440024137497
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,4,2,128,1,float16,fp8,0,0.02033279985189438
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,4,2,128,1,fp8,fp8,0,0.020399999618530274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,4,4,128,1,float16,float16,0,0.019438399374485014
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,4,4,128,1,float16,fp8,0,0.02032800018787384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,4,4,128,1,fp8,fp8,0,0.020390400290489198
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,4,1,128,1,float16,float16,0,0.019308799505233766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,4,1,128,1,float16,fp8,0,0.020073600113391876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,4,1,128,1,fp8,fp8,0,0.020003199577331543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,4,2,128,1,float16,float16,0,0.01895360052585602
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,4,2,128,1,float16,fp8,0,0.02006080001592636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,4,2,128,1,fp8,fp8,0,0.020054399967193604
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,4,4,128,1,float16,float16,0,0.019012799859046935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,4,4,128,1,float16,fp8,0,0.020076799392700195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,4,4,128,1,fp8,fp8,0,0.02014240026473999
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,4,1,128,1,float16,float16,0,0.018916800618171692
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,4,1,128,1,float16,fp8,0,0.019713599979877473
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,4,1,128,1,fp8,fp8,0,0.019708800315856933
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,4,2,128,1,float16,float16,0,0.019067199528217317
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,4,2,128,1,float16,fp8,0,0.01961120069026947
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,4,2,128,1,fp8,fp8,0,0.019860799610614776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,4,4,128,1,float16,float16,0,0.019079999625682832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,4,4,128,1,float16,fp8,0,0.01997919976711273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,4,4,128,1,fp8,fp8,0,0.01974080055952072
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,4,1,128,1,float16,float16,0,0.01740639954805374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,4,1,128,1,float16,fp8,0,0.017785599827766417
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,4,1,128,1,fp8,fp8,0,0.017734399437904357
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,4,2,128,1,float16,float16,0,0.018753600120544434
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,4,2,128,1,float16,fp8,0,0.019840000569820403
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,4,2,128,1,fp8,fp8,0,0.01972319930791855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,4,4,128,1,float16,float16,0,0.018760000169277192
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,4,4,128,1,float16,fp8,0,0.01964640021324158
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,4,4,128,1,fp8,fp8,0,0.019521600008010863
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,4,1,128,1,float16,float16,0,0.016113600134849547
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,4,1,128,1,float16,fp8,0,0.017044800519943237
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,4,1,128,1,fp8,fp8,0,0.017291200160980225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,4,2,128,1,float16,float16,0,0.017192000150680543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,4,2,128,1,float16,fp8,0,0.01791519969701767
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,4,2,128,1,fp8,fp8,0,0.017961600422859193
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,2,2,128,1,float16,float16,0,1.011070442199707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,2,1,128,1,float16,float16,0,1.865132713317871
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,2,2,128,1,float16,fp8,0,0.9729503631591797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,2,1,128,1,float16,fp8,0,1.7303855895996094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,2,2,128,1,fp8,fp8,0,0.9714960098266602
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16384,2,1,128,1,fp8,fp8,0,1.7388479232788085
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,2,1,128,1,float16,float16,0,0.9268783569335938
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,2,1,128,1,float16,fp8,0,0.891756820678711
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,2,2,128,1,float16,float16,0,0.5181471824645996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,2,1,128,1,fp8,fp8,0,0.9340831756591796
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,2,2,128,1,float16,fp8,0,0.4999536037445068
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,2,2,128,1,fp8,fp8,0,0.5021440029144287
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,2,1,128,1,float16,float16,0,0.479094409942627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,2,1,128,1,float16,fp8,0,0.46051201820373533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,2,1,128,1,fp8,fp8,0,0.4523183822631836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,2,2,128,1,float16,float16,0,0.2694528102874756
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,2,2,128,1,float16,fp8,0,0.2574527978897095
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,2,2,128,1,fp8,fp8,0,0.2566431999206543
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,2,1,128,1,float16,float16,0,0.24089760780334474
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,2,1,128,1,float16,fp8,0,0.23742880821228027
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,2,1,128,1,fp8,fp8,0,0.2410383939743042
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,2,1,128,1,float16,float16,0,1.100817584991455
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,2,1,128,1,float16,fp8,0,1.0494959831237793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,2,2,128,1,float16,float16,0,0.6584991931915283
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,2,2,128,1,float16,fp8,0,0.6044064044952393
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,12288,2,1,128,1,fp8,fp8,0,1.0493056297302246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,2,2,128,1,fp8,fp8,0,0.605014419555664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,2,1,128,1,float16,float16,0,0.5719711780548096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,2,1,128,1,float16,fp8,0,0.5571008205413819
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,2,2,128,1,float16,float16,0,0.3640255928039551
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,2,1,128,1,fp8,fp8,0,0.5723167896270752
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,2,2,128,1,float16,fp8,0,0.3243135929107666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,2,2,128,1,fp8,fp8,0,0.3197088003158569
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,2,1,128,1,float16,float16,0,0.30776801109313967
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,2,1,128,1,float16,fp8,0,0.29501440525054934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,2,1,128,1,fp8,fp8,0,0.2896176099777222
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,2,2,128,1,float16,float16,0,0.20470080375671387
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,2,2,128,1,float16,fp8,0,0.19539999961853027
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,2,2,128,1,fp8,fp8,0,0.1967728018760681
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,2,1,128,1,float16,float16,0,0.1857599973678589
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,2,1,128,1,float16,float16,0,0.79344801902771
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,2,1,128,1,float16,fp8,0,0.765556812286377
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,2,1,128,1,float16,fp8,0,0.182532799243927
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,2,1,128,1,fp8,fp8,0,0.1810479998588562
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,10240,2,1,128,1,fp8,fp8,0,0.7754079818725585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,2,2,128,1,float16,float16,0,0.4962592124938965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,2,2,128,1,float16,fp8,0,0.4495039939880371
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,2,2,128,1,fp8,fp8,0,0.4522079944610596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,2,1,128,1,float16,float16,0,0.417955207824707
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,2,1,128,1,float16,fp8,0,0.4054175853729248
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,2,2,128,1,float16,float16,0,0.28910720348358154
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,2,1,128,1,fp8,fp8,0,0.4057119846343994
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,2,2,128,1,float16,fp8,0,0.2392575979232788
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,2,2,128,1,fp8,fp8,0,0.23740320205688475
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,2,1,128,1,float16,float16,0,0.22465600967407226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,2,1,128,1,float16,fp8,0,0.21261279582977294
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,2,1,128,1,fp8,fp8,0,0.21041278839111327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,2,2,128,1,float16,float16,0,0.16832159757614135
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,2,2,128,1,float16,fp8,0,0.16382880210876466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,2,2,128,1,fp8,fp8,0,0.16396160125732423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,2,1,128,1,float16,float16,0,0.15771360397338868
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,2,1,128,1,float16,fp8,0,0.1533967971801758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,2,1,128,1,fp8,fp8,0,0.15215200185775757
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,2,1,128,1,float16,float16,0,1.0409152030944824
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,2,1,128,1,float16,fp8,0,0.9858816146850586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,2,2,128,1,float16,float16,0,0.6059648036956787
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,8192,2,1,128,1,fp8,fp8,0,0.9921072006225586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,2,2,128,1,float16,fp8,0,0.5794928073883057
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,2,2,128,1,fp8,fp8,0,0.5791247844696045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,2,1,128,1,float16,float16,0,0.5300399780273437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,2,1,128,1,float16,fp8,0,0.5106128215789795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,2,1,128,1,fp8,fp8,0,0.5227312088012696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,2,2,128,1,float16,float16,0,0.310918402671814
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,2,2,128,1,float16,fp8,0,0.3032399892807007
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,2,2,128,1,fp8,fp8,0,0.3021807909011841
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,2,1,128,1,float16,float16,0,0.27663359642028806
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,2,1,128,1,float16,fp8,0,0.2649087905883789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,2,1,128,1,float16,fp8,0,0.13829280138015748
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,2,1,128,1,fp8,fp8,0,0.26375041007995603
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,2,2,128,1,float16,float16,0,0.1676800012588501
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,2,2,128,1,float16,fp8,0,0.15591360330581666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,2,2,128,1,fp8,fp8,0,0.1548383951187134
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,2,1,128,1,float16,float16,0,0.14256800413131715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,2,1,128,1,fp8,fp8,0,0.138481605052948
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,2,2,128,1,float16,float16,0,0.1352911949157715
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,2,2,128,1,float16,fp8,0,0.1340783953666687
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,2,2,128,1,fp8,fp8,0,0.1335536003112793
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,2,1,128,1,float16,float16,0,0.12724640369415283
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,2,1,128,1,float16,fp8,0,0.12713600397109986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,2,1,128,1,fp8,fp8,0,0.12742079496383668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,2,1,128,1,float16,float16,0,0.6769135951995849
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,2,1,128,1,float16,fp8,0,0.651635217666626
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,6144,2,1,128,1,fp8,fp8,0,0.6519839763641357
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,2,2,128,1,float16,float16,0,0.4179503917694092
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,2,2,128,1,float16,fp8,0,0.4057040214538574
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,2,1,128,1,fp8,fp8,0,0.35248000621795655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,2,2,128,1,fp8,fp8,0,0.407916784286499
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,2,1,128,1,float16,float16,0,0.36204960346221926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,2,1,128,1,float16,fp8,0,0.35357279777526857
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,2,2,128,1,float16,float16,0,0.2310192108154297
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,2,2,128,1,float16,fp8,0,0.22020480632781983
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,2,2,128,1,fp8,fp8,0,0.22371039390563965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,2,1,128,1,float16,float16,0,0.20310399532318116
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,2,1,128,1,float16,fp8,0,0.19203200340270996
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,2,1,128,1,fp8,fp8,0,0.19372479915618895
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,2,2,128,1,float16,float16,0,0.12939679622650146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,2,2,128,1,float16,fp8,0,0.12286239862442017
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,2,2,128,1,fp8,fp8,0,0.12278239727020264
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,2,1,128,1,float16,float16,0,0.11349120140075683
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,2,1,128,1,float16,fp8,0,0.11210880279541016
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,2,1,128,1,fp8,fp8,0,0.11201280355453491
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,2,2,128,1,float16,float16,0,0.09050080180168152
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,2,2,128,1,float16,fp8,0,0.08835359811782836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,2,2,128,1,fp8,fp8,0,0.0886352002620697
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,2,1,128,1,float16,float16,0,0.07439200282096863
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,2,1,128,1,float16,fp8,0,0.07348960041999816
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,2,1,128,1,fp8,fp8,0,0.0747488021850586
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,2,1,128,1,float16,fp8,0,0.6567984104156495
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,2,1,128,1,float16,float16,0,0.6797760009765625
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,2,1,128,1,float16,float16,0,0.3633984088897705
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,2,2,128,1,float16,float16,0,0.43944640159606935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,4096,2,1,128,1,fp8,fp8,0,0.6565936088562012
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,2,2,128,1,float16,fp8,0,0.42363681793212893
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,2,2,128,1,fp8,fp8,0,0.4228544235229492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,2,1,128,1,float16,fp8,0,0.35166239738464355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,2,1,128,1,fp8,fp8,0,0.3514240026473999
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,2,2,128,1,float16,float16,0,0.24410240650177
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,2,2,128,1,float16,fp8,0,0.23450400829315185
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,2,2,128,1,fp8,fp8,0,0.23502719402313232
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,2,1,128,1,float16,float16,0,0.20554718971252442
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,2,1,128,1,float16,fp8,0,0.19725600481033326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,2,1,128,1,fp8,fp8,0,0.19815679788589477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,2,2,128,1,float16,float16,0,0.14450880289077758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,2,2,128,1,float16,fp8,0,0.13254400491714477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,2,2,128,1,fp8,fp8,0,0.1337839961051941
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,2,1,128,1,float16,float16,0,0.12190079689025879
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,2,1,128,1,float16,fp8,0,0.11745760440826417
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,2,1,128,1,fp8,fp8,0,0.11773920059204102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,2,2,128,1,float16,float16,0,0.08834879994392394
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,2,2,128,1,float16,fp8,0,0.08862559795379639
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,2,2,128,1,fp8,fp8,0,0.08861759901046753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,2,1,128,1,float16,float16,0,0.08043839931488037
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,2,1,128,1,float16,fp8,0,0.08061599731445312
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,2,1,128,1,fp8,fp8,0,0.08123199939727783
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,2,2,128,1,float16,float16,0,0.06066399812698364
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,2,2,128,1,float16,fp8,0,0.060734397172927855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,2,2,128,1,fp8,fp8,0,0.06026399731636047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,2,1,128,1,float16,float16,0,0.0557424008846283
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,2,1,128,1,float16,fp8,0,0.05590559840202332
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,2,1,128,1,fp8,fp8,0,0.05581120252609253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,2,1,128,1,float16,float16,0,0.44307680130004884
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,2,1,128,1,float16,fp8,0,0.42884159088134766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,3072,2,1,128,1,fp8,fp8,0,0.42960801124572756
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,2,2,128,1,float16,float16,0,0.30000479221343995
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,2,2,128,1,float16,fp8,0,0.2909552097320557
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,2,2,128,1,fp8,fp8,0,0.291759991645813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,2,1,128,1,float16,float16,0,0.24447999000549317
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,2,1,128,1,float16,fp8,0,0.23615679740905762
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,2,1,128,1,fp8,fp8,0,0.23526558876037598
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,2,2,128,1,float16,float16,0,0.17103519439697265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,2,2,128,1,float16,fp8,0,0.16522560119628907
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,2,2,128,1,fp8,fp8,0,0.16505600214004518
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,2,1,128,1,float16,float16,0,0.14091839790344238
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,2,1,128,1,float16,fp8,0,0.1321760058403015
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,2,1,128,1,fp8,fp8,0,0.13255679607391357
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,2,2,128,1,float16,float16,0,0.09798880219459534
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,2,2,128,1,float16,fp8,0,0.0946672022342682
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,2,2,128,1,fp8,fp8,0,0.09510400295257568
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,2,1,128,1,float16,float16,0,0.08217599987983704
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,2,1,128,1,float16,fp8,0,0.08276640176773072
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,2,1,128,1,fp8,fp8,0,0.08284959793090821
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,2,2,128,1,float16,float16,0,0.07171199917793274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,2,2,128,1,float16,fp8,0,0.07124959826469421
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,2,2,128,1,fp8,fp8,0,0.07187359929084777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,2,1,128,1,float16,float16,0,0.060945600271224976
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,2,1,128,1,float16,fp8,0,0.061689597368240354
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,2,1,128,1,fp8,fp8,0,0.06177279949188232
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,2,2,128,1,float16,float16,0,0.04451040029525757
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,2,2,128,1,float16,fp8,0,0.0454479992389679
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,2,2,128,1,fp8,fp8,0,0.04522719979286194
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,2,1,128,1,float16,float16,0,0.04200319945812225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,2,1,128,1,float16,fp8,0,0.04275839924812317
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,2,1,128,1,fp8,fp8,0,0.04279040098190308
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,2,1,128,1,float16,float16,0,0.47362561225891114
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,2,1,128,1,float16,fp8,0,0.45928640365600587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,2,2,128,1,float16,float16,0,0.3322688102722168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,2048,2,1,128,1,fp8,fp8,0,0.459716796875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,2,2,128,1,float16,fp8,0,0.3220992088317871
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,2,2,128,1,fp8,fp8,0,0.32202560901641847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,2,1,128,1,float16,float16,0,0.25503199100494384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,2,1,128,1,float16,fp8,0,0.2479856014251709
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,2,2,128,1,float16,float16,0,0.18364319801330567
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,2,1,128,1,fp8,fp8,0,0.24920639991760254
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,2,2,128,1,float16,fp8,0,0.1794543981552124
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,2,2,128,1,fp8,fp8,0,0.17907359600067138
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,2,1,128,1,float16,float16,0,0.14697439670562745
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,2,1,128,1,float16,fp8,0,0.14113279581069946
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,2,1,128,1,fp8,fp8,0,0.14111039638519288
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,2,2,128,1,float16,float16,0,0.11052000522613525
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,2,2,128,1,float16,fp8,0,0.10061919689178467
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,2,2,128,1,fp8,fp8,0,0.10364320278167724
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,2,1,128,1,float16,float16,0,0.08701440095901489
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,2,1,128,1,float16,fp8,0,0.0843999981880188
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,2,1,128,1,fp8,fp8,0,0.08332800269126892
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,2,2,128,1,float16,float16,0,0.06261600255966186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,2,2,128,1,float16,fp8,0,0.0630944013595581
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,2,2,128,1,fp8,fp8,0,0.063155198097229
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,2,1,128,1,float16,float16,0,0.055220800638198855
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,2,1,128,1,float16,fp8,0,0.0559328019618988
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,2,1,128,1,fp8,fp8,0,0.05577279925346375
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,2,2,128,1,float16,float16,0,0.04743840098381043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,2,2,128,1,float16,fp8,0,0.04826880097389221
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,2,2,128,1,fp8,fp8,0,0.04838240146636963
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,2,1,128,1,float16,float16,0,0.04303199946880341
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,2,1,128,1,float16,fp8,0,0.04434239864349365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,2,1,128,1,fp8,fp8,0,0.04434239864349365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,2,2,128,1,float16,float16,0,0.03436320126056671
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,2,2,128,1,float16,fp8,0,0.03604960143566131
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,2,2,128,1,fp8,fp8,0,0.03591679930686951
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,2,1,128,1,float16,float16,0,0.03309119939804077
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,2,1,128,1,float16,fp8,0,0.035128000378608706
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,2,1,128,1,fp8,fp8,0,0.03523840010166168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,2,1,128,1,float16,float16,0,0.3263808012008667
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,2,1,128,1,float16,fp8,0,0.3176800012588501
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1536,2,1,128,1,fp8,fp8,0,0.31789920330047605
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,2,2,128,1,float16,float16,0,0.23619520664215088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,2,1,128,1,fp8,fp8,0,0.17665599584579467
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,2,2,128,1,float16,fp8,0,0.23116800785064698
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,2,2,128,1,fp8,fp8,0,0.23063039779663086
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,2,1,128,1,float16,float16,0,0.1797808051109314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,2,2,128,1,float16,float16,0,0.1338736057281494
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,2,1,128,1,float16,fp8,0,0.176800000667572
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,2,2,128,1,float16,fp8,0,0.1308303952217102
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,2,2,128,1,float16,fp8,0,0.07646880149841309
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,2,2,128,1,fp8,fp8,0,0.12985759973526
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,2,1,128,1,float16,float16,0,0.10478240251541138
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,2,1,128,1,float16,fp8,0,0.09899200201034546
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,2,1,128,1,fp8,fp8,0,0.10046240091323852
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,2,2,128,1,float16,float16,0,0.07874079942703247
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,2,2,128,1,fp8,fp8,0,0.07587839961051941
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,2,1,128,1,float16,float16,0,0.06273760199546814
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,2,1,128,1,float16,fp8,0,0.06462879776954651
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,2,1,128,1,fp8,fp8,0,0.06435359716415405
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,2,2,128,1,float16,float16,0,0.059305602312088014
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,2,2,128,1,float16,fp8,0,0.05948160290718078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,2,2,128,1,fp8,fp8,0,0.059406399726867676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,2,1,128,1,float16,float16,0,0.051316797733306885
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,2,1,128,1,float16,fp8,0,0.0529695987701416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,2,1,128,1,fp8,fp8,0,0.05299519896507263
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,2,2,128,1,float16,float16,0,0.03963519930839539
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,2,2,128,1,float16,fp8,0,0.03952800035476685
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,2,2,128,1,fp8,fp8,0,0.03987680077552795
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,2,1,128,1,float16,float16,0,0.03545759916305542
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,2,1,128,1,float16,fp8,0,0.0375247985124588
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,2,1,128,1,fp8,fp8,0,0.03744640052318573
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,2,2,128,1,float16,float16,0,0.03126240074634552
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,2,2,128,1,float16,fp8,0,0.031339201331138614
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,2,2,128,1,fp8,fp8,0,0.03134720027446747
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,2,1,128,1,float16,float16,0,0.030511999130249025
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,2,1,128,1,float16,fp8,0,0.030876800417900085
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,2,1,128,1,fp8,fp8,0,0.030691200494766237
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,2,1,128,1,float16,float16,0,0.3693871974945068
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,2,1,128,1,float16,fp8,0,0.36375041007995607
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,2,2,128,1,float16,float16,0,0.27445919513702394
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1024,2,1,128,1,fp8,fp8,0,0.3645008087158203
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,2,2,128,1,float16,fp8,0,0.2715951919555664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,2,2,128,1,fp8,fp8,0,0.2717152118682861
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,2,1,128,1,float16,float16,0,0.1999168038368225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,2,1,128,1,float16,fp8,0,0.1984495997428894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,2,1,128,1,fp8,fp8,0,0.19838399887084962
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,2,2,128,1,float16,float16,0,0.15261440277099608
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,2,2,128,1,float16,fp8,0,0.1520159959793091
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,2,2,128,1,fp8,fp8,0,0.15233759880065917
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,2,1,128,1,float16,float16,0,0.11549279689788819
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,2,1,128,1,float16,fp8,0,0.11310240030288696
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,2,1,128,1,fp8,fp8,0,0.11352959871292115
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,2,2,128,1,float16,float16,0,0.0906831979751587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,2,2,128,1,float16,fp8,0,0.08428800106048584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,2,2,128,1,fp8,fp8,0,0.08538879752159119
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,2,1,128,1,float16,float16,0,0.06674559712409973
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,2,1,128,1,float16,fp8,0,0.06618720293045044
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,2,1,128,1,fp8,fp8,0,0.06672639846801758
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,2,2,128,1,float16,float16,0,0.05074080228805542
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,2,2,128,1,float16,fp8,0,0.05146560072898865
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,2,2,128,1,fp8,fp8,0,0.05146719813346863
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,2,1,128,1,float16,float16,0,0.04270719885826111
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,2,1,128,1,float16,fp8,0,0.04379999935626984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,2,1,128,1,fp8,fp8,0,0.043673598766326906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,2,2,128,1,float16,float16,0,0.040862399339675906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,2,2,128,1,float16,fp8,0,0.042894399166107176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,2,2,128,1,fp8,fp8,0,0.0426607996225357
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,2,1,128,1,float16,float16,0,0.03681280016899109
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,2,1,128,1,float16,fp8,0,0.03882879912853241
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,2,1,128,1,fp8,fp8,0,0.03871200084686279
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,2,2,128,1,float16,float16,0,0.0321152001619339
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,2,2,128,1,float16,fp8,0,0.03253439962863922
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,2,2,128,1,fp8,fp8,0,0.03260320127010345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,2,1,128,1,float16,float16,0,0.03129439949989319
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,2,1,128,1,float16,fp8,0,0.031867200136184694
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,2,1,128,1,fp8,fp8,0,0.03155519962310791
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,2,2,128,1,float16,float16,0,0.02890079915523529
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,2,2,128,1,float16,fp8,0,0.03034079968929291
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,2,2,128,1,fp8,fp8,0,0.030275198817253112
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,2,1,128,1,float16,float16,0,0.0286624014377594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,2,1,128,1,float16,fp8,0,0.029982399940490723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,2,1,128,1,fp8,fp8,0,0.030118399858474733
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,2,1,128,1,float16,float16,0,0.2890559911727905
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,2,1,128,1,float16,fp8,0,0.2893615961074829
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,512,2,1,128,1,fp8,fp8,0,0.28942720890045165
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,2,2,128,1,float16,float16,0,0.2328864097595215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,2,2,128,1,float16,fp8,0,0.23111839294433595
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,2,2,128,1,fp8,fp8,0,0.2314863920211792
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,2,1,128,1,float16,float16,0,0.15728960037231446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,2,1,128,1,float16,fp8,0,0.15833760499954225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,2,1,128,1,fp8,fp8,0,0.1582368016242981
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,2,2,128,1,float16,float16,0,0.12869919538497926
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,2,2,128,1,float16,fp8,0,0.1293936014175415
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,2,2,128,1,fp8,fp8,0,0.12914880514144897
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,2,1,128,1,float16,float16,0,0.09193440079689026
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,2,1,128,1,float16,fp8,0,0.09070879817008973
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,2,1,128,1,fp8,fp8,0,0.08977919816970825
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,2,2,128,1,float16,float16,0,0.07608640193939209
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,2,2,128,1,float16,fp8,0,0.07028319835662841
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,2,2,128,1,fp8,fp8,0,0.0696943998336792
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,2,1,128,1,float16,float16,0,0.05279039740562439
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,2,1,128,1,float16,fp8,0,0.05209760069847107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,2,2,128,1,float16,float16,0,0.04206559956073761
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,2,1,128,1,fp8,fp8,0,0.0522383987903595
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,2,2,128,1,float16,fp8,0,0.04251199960708618
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,2,2,128,1,fp8,fp8,0,0.04268800020217896
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,2,1,128,1,float16,float16,0,0.033542400598526
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,2,1,128,1,float16,fp8,0,0.03516800105571747
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,2,1,128,1,fp8,fp8,0,0.03527039885520935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,2,2,128,1,float16,float16,0,0.0330128014087677
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,2,2,128,1,float16,fp8,0,0.03512159883975983
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,2,2,128,1,fp8,fp8,0,0.03503359854221344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,2,1,128,1,float16,float16,0,0.02860960066318512
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,2,1,128,1,float16,fp8,0,0.030486398935317995
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,2,1,128,1,fp8,fp8,0,0.030532801151275636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,2,2,128,1,float16,float16,0,0.028233599662780762
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,2,2,128,1,float16,fp8,0,0.03057279884815216
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,2,2,128,1,fp8,fp8,0,0.030436798930168152
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,2,1,128,1,float16,float16,0,0.02731359899044037
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,2,1,128,1,float16,fp8,0,0.029020801186561584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,2,1,128,1,fp8,fp8,0,0.02905920147895813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,2,2,128,1,float16,float16,0,0.02735520005226135
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,2,2,128,1,float16,fp8,0,0.029105600714683533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,2,2,128,1,fp8,fp8,0,0.02919520139694214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,2,1,128,1,float16,float16,0,0.026510399580001832
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,2,1,128,1,float16,fp8,0,0.02869119942188263
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,2,1,128,1,fp8,fp8,0,0.029080000519752503
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,2,2,128,1,float16,float16,0,0.026345598697662353
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,2,2,128,1,float16,fp8,0,0.02855679988861084
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,2,2,128,1,fp8,fp8,0,0.028406399488449096
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,2,1,128,1,float16,float16,0,0.026481598615646362
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,2,1,128,1,float16,fp8,0,0.02826400101184845
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,2,1,128,1,fp8,fp8,0,0.028670400381088257
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,2,1,128,1,float16,float16,0,0.14549280405044557
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,2,1,128,1,float16,fp8,0,0.14795680046081544
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,256,2,1,128,1,fp8,fp8,0,0.14714879989624025
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,2,2,128,1,float16,float16,0,0.12293920516967774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,2,2,128,1,float16,fp8,0,0.12006239891052246
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,2,2,128,1,fp8,fp8,0,0.12004319429397584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,2,1,128,1,float16,float16,0,0.08465759754180908
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,2,1,128,1,float16,fp8,0,0.0813696026802063
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,2,1,128,1,fp8,fp8,0,0.08259519934654236
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,2,2,128,1,float16,float16,0,0.07119200229644776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,2,2,128,1,float16,fp8,0,0.06409919857978821
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,2,2,128,1,fp8,fp8,0,0.06324959993362426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,2,1,128,1,float16,float16,0,0.047214400768280027
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,2,1,128,1,float16,fp8,0,0.04727199971675873
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,2,1,128,1,fp8,fp8,0,0.04746879935264588
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,2,2,128,1,float16,float16,0,0.039743998646736146
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,2,2,128,1,float16,fp8,0,0.03938240110874176
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,2,2,128,1,fp8,fp8,0,0.039540800452232364
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,2,1,128,1,float16,float16,0,0.03070560097694397
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,2,1,128,1,float16,fp8,0,0.03194240033626557
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,2,1,128,1,fp8,fp8,0,0.03216480016708374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,2,2,128,1,float16,float16,0,0.03017919957637787
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,2,2,128,1,float16,fp8,0,0.031112000346183777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,2,2,128,1,fp8,fp8,0,0.031159999966621398
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,2,1,128,1,float16,float16,0,0.025670400261878966
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,2,1,128,1,float16,fp8,0,0.027136000990867614
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,2,1,128,1,fp8,fp8,0,0.026844799518585205
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,2,2,128,1,float16,float16,0,0.025574401021003723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,2,2,128,1,float16,fp8,0,0.02646079957485199
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,2,2,128,1,fp8,fp8,0,0.0268528014421463
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,2,1,128,1,float16,float16,0,0.024396799504756927
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,2,1,128,1,float16,fp8,0,0.025777599215507506
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,2,1,128,1,fp8,fp8,0,0.025411200523376466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,2,2,128,1,float16,float16,0,0.02396479994058609
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,2,2,128,1,float16,fp8,0,0.025220799446105956
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,2,2,128,1,fp8,fp8,0,0.02515999972820282
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,2,1,128,1,float16,float16,0,0.023721599578857423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,2,1,128,1,float16,fp8,0,0.024822400510311128
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,2,1,128,1,fp8,fp8,0,0.024798400700092316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,2,2,128,1,float16,float16,0,0.023212799429893495
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,2,2,128,1,float16,fp8,0,0.024961599707603456
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,2,2,128,1,fp8,fp8,0,0.024435199797153473
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,2,1,128,1,float16,float16,0,0.023315200209617616
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,2,1,128,1,float16,fp8,0,0.02447360008955002
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,2,1,128,1,fp8,fp8,0,0.024831999838352204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,2,2,128,1,float16,float16,0,0.022407999634742735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,2,2,128,1,float16,fp8,0,0.02314240038394928
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,2,2,128,1,fp8,fp8,0,0.02359199970960617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,2,1,128,1,float16,float16,0,0.022023999691009523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,2,1,128,1,float16,fp8,0,0.023233599960803986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,2,1,128,1,fp8,fp8,0,0.0234047994017601
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,2,1,128,1,float16,float16,0,0.08329280018806458
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,2,1,128,1,float16,fp8,0,0.0788703978061676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,128,2,1,128,1,fp8,fp8,0,0.07904639840126038
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,2,2,128,1,float16,float16,0,0.06966720223426819
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,2,2,128,1,float16,fp8,0,0.060862398147583006
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,2,2,128,1,fp8,fp8,0,0.06007680296897888
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,2,1,128,1,float16,float16,0,0.04543200135231018
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,2,1,128,1,float16,fp8,0,0.04420160055160523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,2,1,128,1,fp8,fp8,0,0.044073599576950076
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,2,2,128,1,float16,float16,0,0.037945601344108584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,2,2,128,1,float16,fp8,0,0.038201600313186646
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,2,2,128,1,fp8,fp8,0,0.037662398815155027
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,2,1,128,1,float16,float16,0,0.03028639853000641
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,2,1,128,1,fp8,fp8,0,0.030788800120353697
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,2,2,128,1,float16,float16,0,0.02905279994010925
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,2,1,128,1,float16,float16,0,0.024691200256347655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,2,1,128,1,float16,fp8,0,0.030852800607681273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,2,2,128,1,fp8,fp8,0,0.029635199904441835
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,2,2,128,1,float16,fp8,0,0.02951360046863556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,2,1,128,1,float16,fp8,0,0.025036799907684325
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,2,1,128,1,fp8,fp8,0,0.025070399045944214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,2,2,128,1,float16,float16,0,0.024545599520206452
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,2,2,128,1,float16,fp8,0,0.025064000487327577
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,2,2,128,1,fp8,fp8,0,0.024369600415229797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,2,1,128,1,float16,float16,0,0.022888000309467315
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,2,1,128,1,float16,fp8,0,0.023713600635528565
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,2,1,128,1,fp8,fp8,0,0.023556800186634065
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,2,2,128,1,float16,float16,0,0.022755199670791627
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,2,1,128,1,float16,fp8,0,0.022628800570964815
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,2,2,128,1,fp8,fp8,0,0.023227199912071228
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,2,2,128,1,float16,fp8,0,0.023156799376010895
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,2,1,128,1,float16,float16,0,0.022327999770641326
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,2,1,128,1,fp8,fp8,0,0.02306240051984787
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,2,2,128,1,float16,float16,0,0.021991999447345735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,2,2,128,1,float16,float16,0,0.020670400559902193
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,2,2,128,1,float16,fp8,0,0.022276799380779266
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,2,1,128,1,float16,fp8,0,0.02264160066843033
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,2,2,128,1,fp8,fp8,0,0.022486400604248048
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,2,1,128,1,float16,float16,0,0.021860800683498383
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,2,1,128,1,fp8,fp8,0,0.022251200675964356
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,2,1,128,1,float16,float16,0,0.020612800121307374
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,2,2,128,1,float16,fp8,0,0.02158239930868149
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,2,2,128,1,fp8,fp8,0,0.021721599996089934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,2,1,128,1,float16,fp8,0,0.021044799685478212
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,2,1,128,1,fp8,fp8,0,0.021374399960041045
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,2,2,128,1,float16,float16,0,0.01987359970808029
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,2,2,128,1,float16,fp8,0,0.020510399341583253
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,2,2,128,1,fp8,fp8,0,0.020747199654579163
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,2,1,128,1,float16,float16,0,0.019617600739002226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,2,1,128,1,float16,fp8,0,0.02075359970331192
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,2,1,128,1,fp8,fp8,0,0.02045599967241287
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,2,1,128,1,float16,float16,0,0.047516798973083495
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,2,1,128,1,float16,fp8,0,0.046609601378440856
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,64,2,1,128,1,fp8,fp8,0,0.046331200003623965
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,2,2,128,1,float16,float16,0,0.04217599928379059
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,2,1,128,1,float16,float16,0,0.03149920105934143
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,2,2,128,1,fp8,fp8,0,0.04371519982814789
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,2,1,128,1,float16,fp8,0,0.032425600290298465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,2,2,128,1,float16,fp8,0,0.04351199865341186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,2,1,128,1,fp8,fp8,0,0.031918400526046754
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,2,2,128,1,float16,float16,0,0.029262399673461913
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,2,2,128,1,float16,fp8,0,0.0301503986120224
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,2,2,128,1,fp8,fp8,0,0.03011679947376251
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,2,1,128,1,float16,float16,0,0.025171199440956117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,2,1,128,1,float16,fp8,0,0.025748801231384278
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,2,1,128,1,fp8,fp8,0,0.025758400559425354
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,2,2,128,1,float16,float16,0,0.0241007998585701
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,2,2,128,1,float16,fp8,0,0.02499839961528778
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,2,2,128,1,fp8,fp8,0,0.025017601251602174
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,2,1,128,1,float16,float16,0,0.0234047994017601
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,2,1,128,1,float16,fp8,0,0.0239439994096756
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,2,1,128,1,fp8,fp8,0,0.02436159998178482
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,2,2,128,1,float16,float16,0,0.022614400088787078
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,2,2,128,1,float16,fp8,0,0.02364960014820099
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,2,2,128,1,fp8,fp8,0,0.02340639978647232
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,2,1,128,1,float16,float16,0,0.021998399496078493
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,2,1,128,1,float16,fp8,0,0.023078399896621703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,2,1,128,1,float16,fp8,0,0.022390399873256684
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,2,1,128,1,fp8,fp8,0,0.02306240051984787
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,2,2,128,1,float16,float16,0,0.02176959961652756
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,2,2,128,1,float16,fp8,0,0.022510400414466857
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,2,2,128,1,fp8,fp8,0,0.022889600694179536
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,2,1,128,1,float16,float16,0,0.021998399496078493
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,2,1,128,1,fp8,fp8,0,0.02258400022983551
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,2,2,128,1,float16,float16,0,0.020478400588035583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,2,2,128,1,float16,fp8,0,0.021054400503635405
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,2,2,128,1,fp8,fp8,0,0.021137599647045136
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,2,1,128,1,float16,float16,0,0.020503999292850496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,2,1,128,1,float16,fp8,0,0.021451200544834136
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,2,1,128,1,fp8,fp8,0,0.021291199326515197
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,2,2,128,1,float16,float16,0,0.019424000382423402
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,2,2,128,1,float16,fp8,0,0.020372800529003143
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,2,1,128,1,float16,fp8,0,0.020766399800777435
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,2,1,128,1,fp8,fp8,0,0.0204927995800972
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,2,2,128,1,fp8,fp8,0,0.02072799950838089
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,2,1,128,1,float16,float16,0,0.019921599328517912
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,2,1,128,1,fp8,fp8,0,0.020268799364566804
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,2,2,128,1,float16,float16,0,0.019099199771881105
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,2,2,128,1,fp8,fp8,0,0.020185600221157073
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,2,2,128,1,float16,fp8,0,0.020001600682735442
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,2,1,128,1,float16,float16,0,0.0196943998336792
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,2,1,128,1,float16,fp8,0,0.020431999862194062
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,2,1,128,1,float16,float16,0,0.03707999885082245
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,2,1,128,1,float16,fp8,0,0.03878560066223145
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,32,2,1,128,1,fp8,fp8,0,0.03914720118045807
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,2,2,128,1,float16,float16,0,0.034329599142074584
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,2,2,128,1,float16,fp8,0,0.03576160073280334
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,2,2,128,1,fp8,fp8,0,0.03582560122013092
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,2,1,128,1,float16,float16,0,0.02613599896430969
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,2,1,128,1,float16,fp8,0,0.027001601457595826
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,2,1,128,1,fp8,fp8,0,0.026892799139022826
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,2,2,128,1,float16,float16,0,0.02505599856376648
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,2,2,128,1,float16,fp8,0,0.025649601221084596
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,2,2,128,1,fp8,fp8,0,0.025726398825645445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,2,1,128,1,float16,float16,0,0.023979200422763823
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,2,1,128,1,float16,fp8,0,0.024332800507545473
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,2,1,128,1,fp8,fp8,0,0.024699200689792634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,2,2,128,1,float16,float16,0,0.02312159985303879
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,2,2,128,1,float16,fp8,0,0.023731200397014617
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,2,2,128,1,fp8,fp8,0,0.024195200204849242
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,2,1,128,1,float16,float16,0,0.022731199860572815
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,2,1,128,1,float16,fp8,0,0.023511999845504762
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,2,1,128,1,fp8,fp8,0,0.023324799537658692
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,2,2,128,1,float16,float16,0,0.021806399524211883
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,2,2,128,1,float16,fp8,0,0.023177599906921385
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,2,2,128,1,fp8,fp8,0,0.022888000309467315
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,2,2,128,1,fp8,fp8,0,0.021352000534534454
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,2,1,128,1,float16,float16,0,0.021721599996089934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,2,1,128,1,float16,fp8,0,0.022806400060653688
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,2,1,128,1,fp8,fp8,0,0.022864000499248506
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,2,2,128,1,float16,float16,0,0.02072480022907257
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,2,2,128,1,float16,fp8,0,0.02147040069103241
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,2,1,128,1,float16,float16,0,0.02083680033683777
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,2,1,128,1,float16,fp8,0,0.021227200329303742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,2,1,128,1,fp8,fp8,0,0.02117439955472946
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,2,2,128,1,float16,float16,0,0.01959040015935898
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,2,2,128,1,float16,fp8,0,0.02048799991607666
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,2,2,128,1,fp8,fp8,0,0.020134399831295013
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,2,1,128,1,float16,float16,0,0.019523200392723084
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,2,1,128,1,float16,fp8,0,0.02054080069065094
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,2,1,128,1,fp8,fp8,0,0.02030719965696335
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,2,2,128,1,float16,float16,0,0.01945119947195053
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,2,2,128,1,float16,fp8,0,0.02014880031347275
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,2,2,128,1,fp8,fp8,0,0.020260800421237946
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,2,1,128,1,float16,float16,0,0.018937599658966065
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,2,1,128,1,float16,fp8,0,0.019963200390338897
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,2,1,128,1,fp8,fp8,0,0.019964799284934998
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,2,2,128,1,float16,float16,0,0.01932799965143204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,2,2,128,1,float16,fp8,0,0.02001120001077652
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,2,2,128,1,fp8,fp8,0,0.020032000541687012
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,2,1,128,1,float16,float16,0,0.019124799966812135
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,2,1,128,1,float16,fp8,0,0.019860799610614776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,2,1,128,1,fp8,fp8,0,0.01987359970808029
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,2,1,128,1,float16,float16,0,0.032358399033546446
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,2,1,128,1,float16,fp8,0,0.03374240100383759
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,16,2,1,128,1,fp8,fp8,0,0.03393599987030029
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,2,2,128,1,float16,float16,0,0.02985120117664337
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,2,2,128,1,float16,fp8,0,0.03136959969997406
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,2,1,128,1,float16,float16,0,0.025411200523376466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,2,2,128,1,fp8,fp8,0,0.031195199489593504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,2,1,128,1,float16,fp8,0,0.026169601082801818
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,2,2,128,1,float16,float16,0,0.0239439994096756
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,2,2,128,1,float16,fp8,0,0.024795199930667877
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,2,1,128,1,fp8,fp8,0,0.025935998558998107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,2,1,128,1,float16,float16,0,0.023287999629974365
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,2,1,128,1,float16,fp8,0,0.0243136003613472
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,2,2,128,1,fp8,fp8,0,0.024929599463939668
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,2,1,128,1,fp8,fp8,0,0.024102400243282317
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,2,1,128,1,fp8,fp8,0,0.023030400276184082
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,2,2,128,1,float16,float16,0,0.022782400250434875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,2,2,128,1,float16,fp8,0,0.023366400599479677
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,2,2,128,1,fp8,fp8,0,0.02340960055589676
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,2,1,128,1,float16,float16,0,0.02260800004005432
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,2,1,128,1,float16,fp8,0,0.02313919961452484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,2,2,128,1,float16,float16,0,0.02096319943666458
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,2,2,128,1,fp8,fp8,0,0.02162719964981079
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,2,2,128,1,float16,fp8,0,0.02176959961652756
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,2,1,128,1,float16,float16,0,0.020627200603485107
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,2,1,128,1,float16,fp8,0,0.021143999695777894
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,2,1,128,1,fp8,fp8,0,0.02133280038833618
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,2,2,128,1,float16,float16,0,0.019724799692630766
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,2,2,128,1,float16,fp8,0,0.020916800200939178
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,2,2,128,1,fp8,fp8,0,0.020580799877643587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,2,1,128,1,float16,float16,0,0.019704000651836397
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,2,1,128,1,float16,fp8,0,0.020294399559497835
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,2,1,128,1,fp8,fp8,0,0.020596800744533537
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,2,2,128,1,float16,float16,0,0.01934559941291809
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,2,2,128,1,float16,fp8,0,0.02003519982099533
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,2,2,128,1,fp8,fp8,0,0.02022079974412918
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,2,1,128,1,float16,float16,0,0.01930239945650101
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,2,1,128,1,float16,fp8,0,0.020080000162124634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,2,1,128,1,fp8,fp8,0,0.020095999538898467
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,2,2,128,1,float16,float16,0,0.01929599940776825
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,2,2,128,1,fp8,fp8,0,0.019952000677585603
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,2,1,128,1,fp8,fp8,0,0.0200095996260643
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,2,2,128,1,float16,fp8,0,0.020024000108242034
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,2,1,128,1,float16,float16,0,0.019206400215625762
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,2,1,128,1,float16,fp8,0,0.0200095996260643
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,2,2,128,1,float16,float16,0,0.019023999571800232
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,2,2,128,1,float16,fp8,0,0.019838400185108185
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,2,2,128,1,fp8,fp8,0,0.01982080042362213
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,2,1,128,1,float16,float16,0,0.019072000682353974
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,2,1,128,1,float16,fp8,0,0.019655999541282655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,2,1,128,1,fp8,fp8,0,0.0200080007314682
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,2,1,128,1,float16,float16,0,0.02956640124320984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,2,1,128,1,float16,fp8,0,0.031206399202346802
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,256,1,2,1,128,1,fp8,fp8,0,0.03125280141830444
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,2,1,128,1,fp8,fp8,0,0.023311999440193177
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,2,2,128,1,float16,float16,0,0.026607999205589296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,2,2,128,1,float16,fp8,0,0.022065599262714387
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,2,2,128,1,float16,fp8,0,0.028385600447654723
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,2,2,128,1,fp8,fp8,0,0.02836320102214813
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,2,1,128,1,float16,float16,0,0.022856000065803527
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,2,1,128,1,float16,fp8,0,0.0236272007226944
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,2,2,128,1,float16,float16,0,0.021316799521446227
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,2,2,128,1,fp8,fp8,0,0.022047999501228332
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,2,1,128,1,float16,float16,0,0.02088959962129593
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,2,1,128,1,float16,fp8,0,0.021844799816608428
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,2,1,128,1,fp8,fp8,0,0.021745599806308746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,2,2,128,1,float16,float16,0,0.020392000675201416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,2,2,128,1,float16,fp8,0,0.021027199923992157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,2,1,128,1,float16,float16,0,0.020099200308322906
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,2,2,128,1,fp8,fp8,0,0.021147200465202333
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,2,1,128,1,float16,fp8,0,0.02067520022392273
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,2,1,128,1,fp8,fp8,0,0.021078400313854218
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,2,2,128,1,float16,float16,0,0.019683200120925903
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,2,2,128,1,float16,fp8,0,0.020494399964809416
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,2,2,128,1,fp8,fp8,0,0.020287999510765077
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,2,1,128,1,float16,fp8,0,0.02011840045452118
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,2,1,128,1,float16,float16,0,0.019475199282169342
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,2,1,128,1,fp8,fp8,0,0.02011680006980896
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,2,2,128,1,float16,float16,0,0.01920959949493408
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,2,2,128,1,float16,fp8,0,0.020156799256801604
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,2,2,128,1,fp8,fp8,0,0.020102399587631225
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,2,1,128,1,float16,float16,0,0.019281600415706635
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,2,1,128,1,float16,fp8,0,0.019995200634002685
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,2,1,128,1,fp8,fp8,0,0.019947199523448943
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,2,2,128,1,float16,float16,0,0.01902559995651245
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,2,2,128,1,float16,fp8,0,0.019908800721168518
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,2,2,128,1,fp8,fp8,0,0.019566400349140166
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,2,1,128,1,float16,float16,0,0.01902880072593689
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,2,1,128,1,float16,fp8,0,0.019487999379634857
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,2,2,128,1,float16,float16,0,0.018910400569438934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,2,2,128,1,fp8,fp8,0,0.019968000054359437
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,2,1,128,1,fp8,fp8,0,0.01987999975681305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,2,2,128,1,float16,fp8,0,0.019809600710868836
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,2,1,128,1,float16,float16,0,0.017054399847984313
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,2,1,128,1,float16,fp8,0,0.017894400656223296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,2,1,128,1,fp8,fp8,0,0.01807360053062439
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,2,2,128,1,float16,float16,0,0.01700640022754669
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,2,2,128,1,float16,fp8,0,0.01780640035867691
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,2,2,128,1,fp8,fp8,0,0.01812800019979477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,2,1,128,1,float16,float16,0,0.016150400042533875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,2,1,128,1,float16,fp8,0,0.01716800034046173
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,2,1,128,1,fp8,fp8,0,0.017159999907016756
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,1,1,128,1,float16,fp8,0,0.5008959770202637
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,1,1,128,1,float16,float16,0,0.26867198944091797
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,1,1,128,1,fp8,fp8,0,0.49930877685546876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,1,1,128,1,float16,fp8,0,0.2548176050186157
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16384,1,1,128,1,float16,float16,0,0.5196800231933594
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16384,1,1,128,1,fp8,fp8,0,0.25653600692749023
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,1,1,128,1,float16,float16,0,0.23743200302124023
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,1,1,128,1,float16,fp8,0,0.23863039016723633
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16384,1,1,128,1,fp8,fp8,0,0.2338495969772339
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,1,1,128,1,float16,float16,0,0.37260799407958983
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,1,1,128,1,float16,fp8,0,0.35746240615844727
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,1,1,128,1,float16,float16,0,0.20647358894348145
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,1,1,128,1,float16,fp8,0,0.2006592035293579
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,12288,1,1,128,1,fp8,fp8,0,0.362059211730957
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,12288,1,1,128,1,fp8,fp8,0,0.19938559532165528
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,1,1,128,1,float16,float16,0,0.12085440158843994
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,1,1,128,1,float16,fp8,0,0.11822079420089722
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,12288,1,1,128,1,fp8,fp8,0,0.11973439455032349
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,1,1,128,1,float16,float16,0,0.2970400094985962
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,1,1,128,1,float16,fp8,0,0.28464159965515134
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,10240,1,1,128,1,fp8,fp8,0,0.28596479892730714
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,1,1,128,1,float16,float16,0,0.17537440061569215
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,1,1,128,1,float16,fp8,0,0.16881439685821534
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,10240,1,1,128,1,fp8,fp8,0,0.169159996509552
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,1,1,128,1,float16,float16,0,0.10255839824676513
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,1,1,128,1,fp8,fp8,0,0.3567375898361206
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,1,1,128,1,float16,fp8,0,0.10109920501708984
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,10240,1,1,128,1,fp8,fp8,0,0.10057920217514038
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,1,1,128,1,float16,float16,0,0.3662415981292725
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,8192,1,1,128,1,float16,fp8,0,0.35795040130615235
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,1,1,128,1,float16,float16,0,0.2218400001525879
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,1,1,128,1,float16,fp8,0,0.20941760540008544
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,8192,1,1,128,1,fp8,fp8,0,0.20806241035461426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,1,1,128,1,float16,float16,0,0.14023840427398682
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,1,1,128,1,float16,fp8,0,0.1390560030937195
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,8192,1,1,128,1,fp8,fp8,0,0.13914400339126587
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,1,1,128,1,float16,float16,0,0.08621119856834411
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,1,1,128,1,float16,fp8,0,0.08596799969673156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,8192,1,1,128,1,fp8,fp8,0,0.0866320013999939
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,1,1,128,1,float16,float16,0,0.24738879203796388
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,1,1,128,1,float16,fp8,0,0.23837120532989503
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,6144,1,1,128,1,fp8,fp8,0,0.23820641040802001
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,1,1,128,1,float16,float16,0,0.14250080585479735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,1,1,128,1,float16,fp8,0,0.1365504026412964
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,1,1,128,1,float16,float16,0,0.08951039910316468
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,1,1,128,1,float16,fp8,0,0.08850240111351013
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,1,1,128,1,fp8,fp8,0,0.057492798566818236
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,6144,1,1,128,1,fp8,fp8,0,0.08914560079574585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,1,1,128,1,float16,float16,0,0.056806397438049314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,6144,1,1,128,1,fp8,fp8,0,0.1368175983428955
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,6144,1,1,128,1,float16,fp8,0,0.057025599479675296
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,1,1,128,1,float16,float16,0,0.24904320240020753
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,1,1,128,1,float16,fp8,0,0.24226078987121583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,4096,1,1,128,1,fp8,fp8,0,0.24226560592651367
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,1,1,128,1,float16,float16,0,0.1512287974357605
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,1,1,128,1,float16,fp8,0,0.13884320259094238
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,4096,1,1,128,1,fp8,fp8,0,0.13984320163726807
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,1,1,128,1,float16,float16,0,0.08867359757423401
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,1,1,128,1,float16,float16,0,0.06003999710083008
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,1,1,128,1,float16,fp8,0,0.06027039885520935
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,1,1,128,1,float16,fp8,0,0.08878399729728699
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,4096,1,1,128,1,fp8,fp8,0,0.08818240165710449
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,4096,1,1,128,1,fp8,fp8,0,0.06028159856796265
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,1,1,128,1,fp8,fp8,0,0.04173760116100311
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,1,1,128,1,float16,float16,0,0.040694400668144226
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,4096,1,1,128,1,float16,fp8,0,0.041771200299263
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,1,1,128,1,float16,float16,0,0.17539360523223876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,1,1,128,1,float16,float16,0,0.10447520017623901
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,1,1,128,1,float16,fp8,0,0.09978880286216736
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,1,1,128,1,float16,fp8,0,0.16821119785308838
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,1,1,128,1,float16,float16,0,0.07202879786491394
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,3072,1,1,128,1,fp8,fp8,0,0.1693727970123291
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,3072,1,1,128,1,fp8,fp8,0,0.1010208010673523
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,1,1,128,1,float16,fp8,0,0.07054240107536316
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,3072,1,1,128,1,fp8,fp8,0,0.07134079933166504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,1,1,128,1,float16,float16,0,0.044593599438667295
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,1,1,128,1,float16,fp8,0,0.04529280066490173
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,1,1,128,1,float16,float16,0,0.034771201014518735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,3072,1,1,128,1,fp8,fp8,0,0.04561760127544403
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,1,1,128,1,float16,fp8,0,0.03538079857826233
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,3072,1,1,128,1,fp8,fp8,0,0.035489600896835324
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,1,1,128,1,float16,fp8,0,0.1816831946372986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,1,1,128,1,float16,float16,0,0.18536640405654908
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,2048,1,1,128,1,fp8,fp8,0,0.1820736050605774
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,1,1,128,1,float16,float16,0,0.11164959669113159
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,1,1,128,1,float16,fp8,0,0.10401439666748047
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,2048,1,1,128,1,fp8,fp8,0,0.1026960015296936
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,1,1,128,1,float16,float16,0,0.06369919776916504
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,1,1,128,1,float16,fp8,0,0.0630944013595581
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,1,1,128,1,float16,float16,0,0.04747520089149475
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,2048,1,1,128,1,fp8,fp8,0,0.06292480230331421
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,1,1,128,1,fp8,fp8,0,0.0484688013792038
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,1,1,128,1,float16,fp8,0,0.03604960143566131
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,2048,1,1,128,1,float16,fp8,0,0.048528000712394714
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,1,1,128,1,float16,float16,0,0.03394240140914917
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,2048,1,1,128,1,fp8,fp8,0,0.03636800050735474
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,1,1,128,1,float16,float16,0,0.030406400561332703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,1,1,128,1,float16,fp8,0,0.03089120090007782
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,2048,1,1,128,1,fp8,fp8,0,0.031079998612403868
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,1,1,128,1,float16,float16,0,0.13584959506988525
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,1,1,128,1,float16,fp8,0,0.13197120428085327
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1536,1,1,128,1,fp8,fp8,0,0.1320304036140442
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,1,1,128,1,float16,float16,0,0.07996799945831298
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,1,1,128,1,float16,fp8,0,0.07677119970321655
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,1,1,128,1,float16,float16,0,0.05960000157356262
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1536,1,1,128,1,fp8,fp8,0,0.07662720084190369
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,1,1,128,1,float16,fp8,0,0.06227999925613403
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,1,1,128,1,float16,float16,0,0.039585599303245546
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,1,1,128,1,fp8,fp8,0,0.039857599139213565
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1536,1,1,128,1,fp8,fp8,0,0.06027519702911377
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,1,1,128,1,float16,float16,0,0.03131040036678314
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,1,1,128,1,float16,fp8,0,0.03129119873046875
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1536,1,1,128,1,fp8,fp8,0,0.031646400690078735
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1536,1,1,128,1,float16,fp8,0,0.039864000678062436
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,1,1,128,1,float16,float16,0,0.028969600796699524
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,1,1,128,1,float16,fp8,0,0.030144000053405763
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1536,1,1,128,1,fp8,fp8,0,0.03071039915084839
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,1,1,128,1,float16,float16,0,0.15322240591049194
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,1,1,128,1,float16,fp8,0,0.15447360277175903
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1024,1,1,128,1,fp8,fp8,0,0.15437439680099488
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,1,1,128,1,fp8,fp8,0,0.05119839906692505
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,1,1,128,1,float16,float16,0,0.09204800128936767
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,1,1,128,1,float16,fp8,0,0.08564159870147706
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1024,1,1,128,1,fp8,fp8,0,0.08669440150260925
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,1,1,128,1,float16,float16,0,0.05087680220603943
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1024,1,1,128,1,float16,fp8,0,0.05143200159072876
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,1,1,128,1,float16,float16,0,0.04132159948348999
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,1,1,128,1,float16,fp8,0,0.042822399735450746
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1024,1,1,128,1,fp8,fp8,0,0.04286240041255951
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,1,1,128,1,float16,float16,0,0.032425600290298465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,1,1,128,1,float16,fp8,0,0.032902398705482484
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1024,1,1,128,1,fp8,fp8,0,0.03247840106487274
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,1,1,128,1,float16,fp8,0,0.02909280061721802
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,1,1,128,1,float16,float16,0,0.02875039875507355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,1,1,128,1,float16,fp8,0,0.03033280074596405
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1024,1,1,128,1,fp8,fp8,0,0.029931199550628663
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,1,1,128,1,float16,float16,0,0.027806401252746582
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1024,1,1,128,1,fp8,fp8,0,0.029371199011802674
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,1,1,128,1,float16,float16,0,0.130513596534729
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,1,1,128,1,float16,fp8,0,0.13040640354156494
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,512,1,1,128,1,fp8,fp8,0,0.13131040334701538
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,1,1,128,1,float16,float16,0,0.07672320008277893
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,1,1,128,1,float16,fp8,0,0.07062399983406067
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,512,1,1,128,1,fp8,fp8,0,0.06987839937210083
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,1,1,128,1,float16,float16,0,0.04220159947872162
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,1,1,128,1,float16,fp8,0,0.0432559996843338
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,512,1,1,128,1,fp8,fp8,0,0.04315840005874634
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,1,1,128,1,float16,float16,0,0.0332399994134903
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,1,1,128,1,float16,fp8,0,0.035158398747444156
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,512,1,1,128,1,fp8,fp8,0,0.03500800132751465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,1,1,128,1,float16,float16,0,0.028479999303817748
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,1,1,128,1,float16,fp8,0,0.03068479895591736
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,512,1,1,128,1,fp8,fp8,0,0.030532801151275636
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,1,1,128,1,float16,float16,0,0.02693760097026825
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,1,1,128,1,float16,fp8,0,0.02911840081214905
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,1,1,128,1,float16,fp8,0,0.028911998867988585
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,512,1,1,128,1,fp8,fp8,0,0.029300799965858458
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,1,1,128,1,float16,float16,0,0.02640959918498993
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,1,1,128,1,float16,fp8,0,0.02865920066833496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,512,1,1,128,1,fp8,fp8,0,0.028547200560569762
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,1,1,128,1,float16,float16,0,0.026161599159240722
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,512,1,1,128,1,fp8,fp8,0,0.02868799865245819
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,1,1,128,1,float16,float16,0,0.07257279753684998
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,1,1,128,1,float16,fp8,0,0.06538239717483521
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,256,1,1,128,1,fp8,fp8,0,0.06594399809837341
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,1,1,128,1,float16,float16,0,0.039587199687957764
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,1,1,128,1,float16,fp8,0,0.04036319851875305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,256,1,1,128,1,fp8,fp8,0,0.04026080071926117
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,1,1,128,1,float16,float16,0,0.030663999915122985
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,1,1,128,1,float16,fp8,0,0.03187040090560913
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,256,1,1,128,1,fp8,fp8,0,0.03206880092620849
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,1,1,128,1,float16,float16,0,0.02547360062599182
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,1,1,128,1,float16,fp8,0,0.02754240036010742
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,256,1,1,128,1,fp8,fp8,0,0.0267984002828598
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,1,1,128,1,float16,float16,0,0.02396800071001053
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,1,1,128,1,float16,fp8,0,0.02518239915370941
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,256,1,1,128,1,fp8,fp8,0,0.025209599733352663
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,1,1,128,1,float16,float16,0,0.02344159930944443
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,1,1,128,1,float16,fp8,0,0.02467840015888214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,256,1,1,128,1,fp8,fp8,0,0.024747200310230255
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,1,1,128,1,float16,float16,0,0.02333440035581589
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,1,1,128,1,float16,fp8,0,0.024512000381946564
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,256,1,1,128,1,fp8,fp8,0,0.02446720004081726
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,1,1,128,1,float16,float16,0,0.021782399713993074
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,1,1,128,1,float16,fp8,0,0.02336000055074692
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,256,1,1,128,1,fp8,fp8,0,0.023056000471115112
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,1,1,128,1,float16,float16,0,0.039529600739479066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,1,1,128,1,float16,fp8,0,0.03947679996490479
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,128,1,1,128,1,fp8,fp8,0,0.039750400185585025
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,1,1,128,1,float16,float16,0,0.029716798663139345
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,1,1,128,1,float16,fp8,0,0.03017280101776123
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,128,1,1,128,1,fp8,fp8,0,0.030291199684143066
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,1,1,128,1,float16,float16,0,0.02465600073337555
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,1,1,128,1,float16,fp8,0,0.025164800882339477
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,128,1,1,128,1,fp8,fp8,0,0.02518880069255829
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,1,1,128,1,float16,float16,0,0.02265920042991638
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,1,1,128,1,float16,fp8,0,0.02335519939661026
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,128,1,1,128,1,fp8,fp8,0,0.023233599960803986
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,1,1,128,1,float16,float16,0,0.02175839990377426
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,1,1,128,1,float16,fp8,0,0.022401599586009978
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,128,1,1,128,1,fp8,fp8,0,0.02265920042991638
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,1,1,128,1,float16,float16,0,0.021524800360202788
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,1,1,128,1,float16,fp8,0,0.022492800652980805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,128,1,1,128,1,fp8,fp8,0,0.022574399411678315
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,1,1,128,1,float16,float16,0,0.020193600654602052
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,1,1,128,1,float16,fp8,0,0.021169599890708924
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,128,1,1,128,1,fp8,fp8,0,0.02128159999847412
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,1,1,128,1,float16,float16,0,0.019841599464416503
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,1,1,128,1,float16,fp8,0,0.020662400126457214
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,128,1,1,128,1,fp8,fp8,0,0.020596800744533537
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,1,1,128,1,float16,float16,0,0.030636799335479737
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,1,1,128,1,float16,fp8,0,0.03147520124912262
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,64,1,1,128,1,fp8,fp8,0,0.03172479867935181
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,1,1,128,1,float16,float16,0,0.024489599466323852
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,1,1,128,1,float16,fp8,0,0.02560960054397583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,64,1,1,128,1,fp8,fp8,0,0.025651198625564576
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,1,1,128,1,float16,float16,0,0.023171199858188628
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,1,1,128,1,float16,fp8,0,0.023937599360942842
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,64,1,1,128,1,fp8,fp8,0,0.02372799962759018
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,1,1,128,1,float16,float16,0,0.022015999257564544
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,1,1,128,1,float16,fp8,0,0.022763200104236603
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,64,1,1,128,1,fp8,fp8,0,0.022392000257968902
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,1,1,128,1,float16,float16,0,0.021275199949741364
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,1,1,128,1,float16,fp8,0,0.022140799462795256
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,64,1,1,128,1,fp8,fp8,0,0.022265599668025972
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,1,1,128,1,float16,float16,0,0.02030719965696335
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,1,1,128,1,float16,fp8,0,0.021167999505996703
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,64,1,1,128,1,fp8,fp8,0,0.021164800226688384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,1,1,128,1,float16,float16,0,0.019211199879646302
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,1,1,128,1,float16,fp8,0,0.02032800018787384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,64,1,1,128,1,fp8,fp8,0,0.02035039961338043
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,1,1,128,1,float16,float16,0,0.01927199959754944
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,1,1,128,1,float16,fp8,0,0.020193600654602052
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,64,1,1,128,1,fp8,fp8,0,0.019952000677585603
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,1,1,128,1,float16,float16,0,0.02606239914894104
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,1,1,128,1,float16,fp8,0,0.02687999904155731
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,32,1,1,128,1,fp8,fp8,0,0.026840001344680786
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,1,1,128,1,float16,float16,0,0.023524799942970277
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,1,1,128,1,float16,fp8,0,0.024427199363708497
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,32,1,1,128,1,fp8,fp8,0,0.024480000138282776
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,1,1,128,1,float16,float16,0,0.022492800652980805
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,1,1,128,1,float16,fp8,0,0.023603199422359465
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,32,1,1,128,1,fp8,fp8,0,0.023577600717544556
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,1,1,128,1,float16,float16,0,0.021748800575733186
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,1,1,128,1,float16,fp8,0,0.02245279997587204
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,32,1,1,128,1,fp8,fp8,0,0.022336000204086305
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,1,1,128,1,float16,float16,0,0.020534400641918183
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,1,1,128,1,float16,fp8,0,0.021223999559879303
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,32,1,1,128,1,fp8,fp8,0,0.02117599993944168
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,1,1,128,1,float16,float16,0,0.019415999948978423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,1,1,128,1,float16,fp8,0,0.020329600572586058
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,32,1,1,128,1,fp8,fp8,0,0.02051839977502823
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,1,1,128,1,float16,float16,0,0.01924159973859787
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,1,1,128,1,float16,fp8,0,0.020105600357055664
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,32,1,1,128,1,fp8,fp8,0,0.019974400103092194
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,1,1,128,1,float16,float16,0,0.01892320066690445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,1,1,128,1,float16,fp8,0,0.019699199497699736
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,32,1,1,128,1,fp8,fp8,0,0.019915199279785155
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,1,1,128,1,float16,float16,0,0.025308799743652344
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,1,1,128,1,float16,fp8,0,0.02603999972343445
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,16,1,1,128,1,fp8,fp8,0,0.026344001293182373
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,1,1,128,1,float16,float16,0,0.0234592005610466
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,1,1,128,1,float16,fp8,0,0.02417760044336319
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,16,1,1,128,1,fp8,fp8,0,0.024171200394630433
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,1,1,128,1,float16,float16,0,0.0223471999168396
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,1,1,128,1,float16,fp8,0,0.023150399327278137
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,16,1,1,128,1,fp8,fp8,0,0.022735999524593355
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,1,1,128,1,float16,float16,0,0.020812800526618956
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,1,1,128,1,float16,fp8,0,0.021222400665283202
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,16,1,1,128,1,fp8,fp8,0,0.021539199352264404
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,1,1,128,1,fp8,fp8,0,0.02022559940814972
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,1,1,128,1,float16,float16,0,0.01942239999771118
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,1,1,128,1,float16,fp8,0,0.020475199818611144
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,16,1,1,128,1,fp8,fp8,0,0.02030239999294281
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,1,1,128,1,float16,float16,0,0.019313600659370423
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,16,1,1,128,1,float16,fp8,0,0.02014400064945221
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,1,1,128,1,float16,float16,0,0.01895360052585602
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,1,1,128,1,float16,fp8,0,0.019972799718379973
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,16,1,1,128,1,fp8,fp8,0,0.019832000136375427
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,1,1,128,1,float16,float16,0,0.018822400271892546
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,1,1,128,1,float16,fp8,0,0.0195375993847847
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,16,1,1,128,1,fp8,fp8,0,0.019801600277423857
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,1,1,128,1,float16,float16,0,0.022852799296379088
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,1,1,128,1,float16,fp8,0,0.02346239984035492
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,128,1,1,1,128,1,fp8,fp8,0,0.023388800024986268
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,1,1,128,1,float16,float16,0,0.020851199328899384
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,1,1,128,1,float16,fp8,0,0.02205280065536499
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,64,1,1,1,128,1,fp8,fp8,0,0.021809600293636322
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,1,1,128,1,float16,float16,0,0.019849599897861482
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,1,1,128,1,float16,fp8,0,0.020688000321388244
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,32,1,1,1,128,1,fp8,fp8,0,0.020820799469947814
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,1,1,128,1,float16,float16,0,0.01934240013360977
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,1,1,128,1,float16,fp8,0,0.0199535995721817
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,16,1,1,1,128,1,fp8,fp8,0,0.02043360024690628
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,1,1,128,1,float16,float16,0,0.0191648006439209
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,1,1,128,1,float16,fp8,0,0.019870400428771973
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,8,1,1,1,128,1,fp8,fp8,0,0.020164799690246583
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,1,1,128,1,float16,float16,0,0.018908800184726716
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,1,1,128,1,float16,fp8,0,0.01966080069541931
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,4,1,1,1,128,1,fp8,fp8,0,0.019755199551582336
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,1,1,128,1,float16,float16,0,0.017000000178813934
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,1,1,128,1,float16,fp8,0,0.01814880073070526
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,2,1,1,1,128,1,fp8,fp8,0,0.017895999550819396
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,1,1,128,1,float16,float16,0,0.016096000373363496
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,1,1,128,1,float16,fp8,0,0.01706240028142929
SGLang,0.5.6.post2,NVIDIA H200,context_attention,flash_attention,1,1,1,1,128,1,fp8,fp8,0,0.01727519929409027
