framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,64,128,1,float16,float16,1,0.13703199625015258
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,64,128,1,float16,fp8,1,0.16503360271453857
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,64,128,1,float16,float16,3,0.13685760498046876
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,64,128,1,float16,fp8,3,0.16518080234527588
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,64,128,1,float16,float16,7,0.13684480190277098
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,64,128,1,float16,fp8,7,0.16514719724655152
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,64,128,1,float16,float16,15,0.1374384045600891
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,64,128,1,float16,fp8,15,0.1653280019760132
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,64,128,1,float16,float16,31,0.14143999814987182
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,64,128,1,float16,fp8,31,0.1657871961593628
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,64,128,1,float16,float16,63,0.14612480401992797
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,64,128,1,float16,fp8,63,0.17609119415283203
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,64,128,1,float16,float16,127,0.15050079822540283
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,64,128,1,float16,fp8,127,0.18046879768371582
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,64,128,1,float16,float16,255,0.20679519176483155
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,64,128,1,float16,fp8,255,0.23381600379943848
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,64,128,1,float16,float16,511,0.3182384014129639
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,64,128,1,float16,fp8,511,0.3532383918762207
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,64,128,1,float16,float16,1023,0.5712992191314697
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,64,128,1,float16,fp8,1023,0.5427536010742188
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,64,128,1,float16,float16,2047,1.0775055885314941
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,64,128,1,float16,fp8,2047,0.9757391929626464
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,64,128,1,float16,float16,1,0.02046400010585785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,64,128,1,float16,fp8,1,0.020707200467586517
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,64,128,1,float16,float16,3,0.020102399587631225
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,64,128,1,float16,fp8,3,0.020776000618934632
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,64,128,1,float16,float16,7,0.019926400482654573
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,64,128,1,float16,fp8,7,0.02080480009317398
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,64,128,1,float16,float16,15,0.019843199849128725
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,64,128,1,float16,fp8,15,0.02056960016489029
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,64,128,1,float16,float16,31,0.019993600249290467
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,64,128,1,float16,fp8,31,0.020529599487781526
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,64,128,1,float16,float16,63,0.019782400131225585
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,64,128,1,float16,fp8,63,0.020572799444198608
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,64,128,1,float16,float16,127,0.0200095996260643
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,64,128,1,float16,fp8,127,0.020468799769878386
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,64,128,1,float16,float16,255,0.021217599511146545
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,64,128,1,float16,fp8,255,0.02250239998102188
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,64,128,1,float16,float16,511,0.024345600605010988
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,64,128,1,float16,fp8,511,0.026015999913215637
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,64,128,1,float16,float16,1023,0.02837440073490143
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,64,128,1,float16,fp8,1023,0.02933279871940613
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,64,128,1,float16,float16,2047,0.03961600065231323
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,64,128,1,float16,fp8,2047,0.035304000973701476
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,64,128,1,float16,float16,1,0.019923199713230134
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,64,128,1,float16,fp8,1,0.02084160000085831
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,64,128,1,float16,float16,3,0.02001439929008484
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,64,128,1,float16,fp8,3,0.020768000185489653
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,64,128,1,float16,float16,7,0.019897599518299103
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,64,128,1,float16,fp8,7,0.020713600516319274
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,64,128,1,float16,float16,15,0.019923199713230134
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,64,128,1,float16,float16,31,0.020032000541687012
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,64,128,1,float16,fp8,31,0.020894399285316466
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,64,128,1,float16,float16,127,0.0203247994184494
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,64,128,1,float16,float16,511,0.025094398856163026
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,64,128,1,float16,float16,1023,0.03935199975967407
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,64,128,1,float16,fp8,1023,0.03463039994239807
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,64,128,1,float16,float16,2047,0.054683202505111696
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,64,128,1,float16,fp8,2047,0.05394880175590515
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,64,128,1,float16,float16,1,0.07784159779548645
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,64,128,1,float16,fp8,1,0.09056959748268127
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,64,128,1,float16,float16,3,0.07763199806213379
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,64,128,1,float16,fp8,3,0.09040639996528625
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,64,128,1,float16,float16,7,0.07783520221710205
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,64,128,1,float16,fp8,7,0.09051679968833923
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,64,128,1,float16,float16,15,0.07810239791870117
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,64,128,1,float16,fp8,15,0.09074079990386963
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,64,128,1,float16,float16,31,0.07809119820594787
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,64,128,1,float16,fp8,31,0.09087839722633362
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,64,128,1,float16,fp8,15,0.020798400044441223
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,64,128,1,float16,float16,63,0.08431839942932129
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,64,128,1,float16,fp8,63,0.02117599993944168
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,64,128,1,float16,fp8,63,0.09163359999656677
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,64,128,1,float16,float16,63,0.02029920071363449
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,64,128,1,float16,float16,127,0.08716800212860107
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,64,128,1,float16,fp8,127,0.10052640438079834
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,64,128,1,float16,fp8,127,0.021303999423980712
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,64,128,1,float16,float16,255,0.11522239446640015
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,64,128,1,float16,float16,255,0.022091199457645417
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,64,128,1,float16,fp8,255,0.12945280075073243
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,64,128,1,float16,float16,511,0.17405600547790528
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,64,128,1,float16,fp8,511,0.19007840156555175
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,64,128,1,float16,fp8,255,0.02330400049686432
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,64,128,1,float16,fp8,511,0.0272271990776062
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,64,128,1,float16,float16,1023,0.3006688117980957
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,64,128,1,float16,fp8,1023,0.2866847991943359
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,64,128,1,float16,float16,1,0.02374880015850067
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,64,128,1,float16,fp8,1,0.025672000646591187
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,64,128,1,float16,float16,2047,0.5570176124572754
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,64,128,1,float16,float16,3,0.02370080053806305
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,64,128,1,float16,fp8,3,0.02555519938468933
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,64,128,1,float16,fp8,2047,0.47628321647644045
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,64,128,1,float16,float16,7,0.024063999950885772
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,64,128,1,float16,fp8,7,0.0254831999540329
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,64,128,1,float16,fp8,63,0.025814399123191833
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,64,128,1,float16,float16,15,0.02388159930706024
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,64,128,1,float16,float16,255,0.028116801381111146
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,64,128,1,float16,fp8,15,0.02553279995918274
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,64,128,1,float16,float16,31,0.024131199717521666
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,64,128,1,float16,fp8,31,0.025649601221084596
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,64,128,1,float16,float16,63,0.02430880069732666
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,64,128,1,float16,float16,127,0.024403199553489685
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,64,128,1,float16,fp8,127,0.025699201226234435
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,64,128,1,float16,fp8,255,0.029713600873947144
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,64,128,1,float16,float16,511,0.04000959992408752
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,64,128,1,float16,fp8,511,0.03829280138015747
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,64,128,1,float16,float16,1023,0.0578495979309082
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,64,128,1,float16,fp8,1023,0.05859360098838806
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,64,128,1,float16,float16,2047,0.0895247995853424
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,64,128,1,float16,fp8,2047,0.08258399963378907
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,64,128,1,float16,float16,1,0.25358879566192627
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,64,128,1,float16,fp8,1,0.31247360706329347
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,64,128,1,float16,float16,3,0.2538608074188232
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,64,128,1,float16,float16,7,0.2541759967803955
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,64,128,1,float16,fp8,3,0.31142559051513674
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,64,128,1,float16,fp8,7,0.31352479457855226
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,64,128,1,float16,float16,15,0.2574160099029541
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,64,128,1,float16,fp8,15,0.3111680030822754
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,64,128,1,float16,float16,31,0.2688911914825439
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,64,128,1,float16,fp8,31,0.3210319995880127
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,64,128,1,float16,float16,63,0.27213120460510254
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,64,128,1,float16,fp8,63,0.3301647901535034
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,64,128,1,float16,float16,127,0.27674880027771
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,64,128,1,float16,fp8,127,0.33894240856170654
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,64,128,1,float16,float16,255,0.389247989654541
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,64,128,1,float16,fp8,255,0.4448575973510742
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,64,128,1,float16,float16,511,0.6246592044830322
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,64,128,1,float16,float16,1,0.48611998558044434
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,64,128,1,float16,fp8,511,0.6725728034973144
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,64,128,1,float16,fp8,1,0.6049744129180908
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,64,128,1,float16,float16,3,0.487497615814209
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,64,128,1,float16,float16,1023,1.1334511756896972
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,64,128,1,float16,float16,7,0.491487979888916
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,64,128,1,float16,fp8,3,0.6094399929046631
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,64,128,1,float16,fp8,1023,1.0449024200439454
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,64,128,1,float16,float16,15,0.5103280067443847
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,64,128,1,float16,fp8,7,0.6075583934783936
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,64,128,1,float16,fp8,15,0.6079823970794678
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,64,128,1,float16,float16,31,0.5798751831054687
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,64,128,1,float16,fp8,31,0.6396207809448242
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,64,128,1,float16,fp8,63,0.6408912181854248
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,64,128,1,float16,float16,63,0.5201087951660156
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,64,128,1,float16,float16,127,0.5329455852508544
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,64,128,1,float16,fp8,127,0.6483503818511963
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,64,128,1,float16,float16,255,0.7568448066711426
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,64,128,1,float16,float16,1,0.9924320220947266
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,64,128,1,float16,fp8,255,0.8827216148376464
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,64,128,1,float16,fp8,1,1.1863375663757325
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,64,128,1,float16,float16,3,0.985529613494873
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,64,128,1,float16,fp8,3,1.1615856170654297
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,64,128,1,float16,float16,7,0.9978128433227539
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,64,128,1,float16,fp8,7,1.1709199905395509
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,64,128,1,float16,float16,15,1.0027968406677246
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,64,128,1,float16,fp8,15,1.2479920387268066
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,64,128,1,float16,float16,31,1.0059391975402832
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,64,128,1,float16,fp8,31,1.2567055702209473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,64,128,1,float16,float16,1,0.031972798705101016
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,64,128,1,float16,float16,63,1.0047792434692382
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,64,128,1,float16,fp8,1,0.03531680107116699
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,64,128,1,float16,float16,3,0.03224959969520569
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,64,128,1,float16,float16,15,0.032307198643684386
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,64,128,1,float16,fp8,63,1.253486442565918
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,64,128,1,float16,fp8,3,0.03499679863452911
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,64,128,1,float16,float16,7,0.03200640082359314
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,64,128,1,float16,float16,127,1.0400752067565917
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,64,128,1,float16,fp8,7,0.035283198952674864
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,64,128,1,float16,fp8,127,1.277064037322998
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,64,128,1,float16,fp8,15,0.03524320125579834
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,64,128,1,float16,float16,31,0.0322735995054245
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,64,128,1,float16,fp8,31,0.035308799147605895
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,64,128,1,float16,float16,63,0.032425600290298465
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,64,128,1,float16,fp8,63,0.035364800691604616
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,64,128,1,float16,float16,127,0.033374398946762085
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,64,128,1,float16,fp8,127,0.03527520000934601
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,64,128,1,float16,float16,255,0.047681599855422974
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,64,128,1,float16,fp8,255,0.04364959895610809
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,64,128,1,float16,float16,511,0.05985919833183288
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,64,128,1,float16,fp8,511,0.06445599794387817
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,64,128,1,float16,float16,1023,0.09550399780273437
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,64,128,1,float16,fp8,1023,0.09187039732933044
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,64,128,1,float16,float16,2047,0.15559359788894653
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,64,128,1,float16,fp8,2047,0.13922079801559448
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,64,128,1,float16,float16,1,1.9704864501953125
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,64,128,1,float16,float16,3,1.9721343994140625
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,64,128,1,float16,fp8,1,2.4884927749633787
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,64,128,1,float16,float16,7,1.9698575973510741
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,64,128,1,float16,fp8,3,2.481603240966797
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,64,128,1,float16,float16,15,1.9686304092407227
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,64,128,1,float16,fp8,7,2.4786319732666016
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,64,128,1,float16,fp8,15,2.461678314208984
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,64,128,1,float16,float16,31,1.9733455657958985
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,64,128,1,float16,float16,63,2.0144016265869142
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,64,128,1,float16,fp8,31,2.4767536163330077
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,64,128,1,float16,fp8,63,2.467288017272949
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,64,128,1,float16,float16,1,3.926675033569336
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,64,128,1,float16,float16,3,3.921142578125
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,64,128,1,float16,fp8,1,4.922622299194336
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,64,128,1,float16,float16,7,3.906441497802734
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,64,128,1,float16,float16,1,0.046998399496078494
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,64,128,1,float16,fp8,3,4.897918319702148
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,64,128,1,float16,fp8,1,0.05355200171470642
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,64,128,1,float16,float16,3,0.047126400470733645
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,64,128,1,float16,float16,15,3.9174671173095703
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,64,128,1,float16,fp8,3,0.053472000360488894
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,64,128,1,float16,float16,7,0.047188800573348996
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,64,128,1,float16,fp8,7,0.053844797611236575
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,64,128,1,float16,fp8,7,4.9196430206298825
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,64,128,1,float16,float16,15,0.04702720046043396
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,64,128,1,float16,fp8,15,0.05347520112991333
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,64,128,1,float16,float16,31,0.047838398814201356
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,64,128,1,float16,fp8,31,0.053744000196456906
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,64,128,1,float16,float16,63,0.048187199234962466
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,64,128,1,float16,fp8,15,4.9306385040283205
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,64,128,1,float16,fp8,63,0.05333120226860046
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,64,128,1,float16,float16,127,0.05462880134582519
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,64,128,1,float16,float16,31,3.893657684326172
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,64,128,1,float16,fp8,127,0.05472639799118042
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,64,128,1,float16,float16,255,0.07036479711532592
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,64,128,1,float16,fp8,255,0.07569599747657776
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,64,128,1,float16,float16,511,0.10014560222625732
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,64,128,1,float16,float16,1023,0.16888959407806398
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,64,128,1,float16,fp8,511,0.1062608003616333
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,64,128,1,float16,fp8,31,4.901771163940429
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,64,128,1,float16,fp8,1023,0.15708320140838622
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,48,48,128,1,float16,float16,1,0.10741759538650512
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,64,128,1,float16,float16,2047,0.28478078842163085
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,64,128,1,float16,fp8,2047,0.2502624034881592
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,48,48,128,1,float16,float16,3,0.10768320560455322
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,48,48,128,1,float16,fp8,1,0.12836320400238038
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,48,48,128,1,float16,fp8,3,0.12754240036010742
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,48,48,128,1,float16,float16,7,0.10787999629974365
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,48,48,128,1,float16,fp8,7,0.1279263973236084
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,48,48,128,1,float16,float16,15,0.10677599906921387
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,48,48,128,1,float16,fp8,15,0.1279360055923462
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,48,48,128,1,float16,float16,31,0.10983359813690186
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,48,48,128,1,float16,fp8,31,0.12900639772415162
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,48,48,128,1,float16,float16,63,0.11571520566940308
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,48,48,128,1,float16,float16,127,0.11819679737091064
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,48,48,128,1,float16,fp8,63,0.13601919412612914
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,48,48,128,1,float16,fp8,127,0.14035040140151978
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,48,48,128,1,float16,float16,255,0.15972479581832885
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,48,48,128,1,float16,fp8,255,0.1828927993774414
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,48,48,128,1,float16,float16,511,0.24540159702301026
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,48,48,128,1,float16,fp8,511,0.27174720764160154
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,48,48,128,1,float16,float16,1023,0.439847993850708
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,48,48,128,1,float16,fp8,1023,0.41800642013549805
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,48,48,128,1,float16,float16,1,0.01928640007972717
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,48,48,128,1,float16,fp8,1,0.020316800475120543
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,48,48,128,1,float16,float16,15,0.019270400702953338
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,48,48,128,1,float16,float16,3,0.019551999866962433
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,48,48,128,1,float16,fp8,3,0.020311999320983886
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,48,48,128,1,float16,float16,2047,0.8160943984985352
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,48,48,128,1,float16,float16,7,0.01936960071325302
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,48,48,128,1,float16,fp8,7,0.020292800664901734
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,48,48,128,1,float16,fp8,2047,0.6903535842895507
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,48,48,128,1,float16,fp8,15,0.02025119960308075
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,48,48,128,1,float16,float16,31,0.01937279999256134
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,48,48,128,1,float16,fp8,31,0.02064639925956726
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,48,48,128,1,float16,float16,63,0.019368000328540802
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,48,48,128,1,float16,fp8,63,0.02045599967241287
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,48,48,128,1,float16,float16,127,0.019424000382423402
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,48,48,128,1,float16,fp8,127,0.020304000377655028
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,48,48,128,1,float16,float16,255,0.021062399446964263
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,48,48,128,1,float16,fp8,255,0.022294400632381438
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,48,48,128,1,float16,float16,511,0.024358400702476503
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,48,48,128,1,float16,fp8,511,0.02613919973373413
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,48,48,128,1,float16,float16,1023,0.02696320116519928
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,48,48,128,1,float16,fp8,1023,0.03210560083389282
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,48,48,128,1,float16,float16,2047,0.042884799838066104
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,48,48,128,1,float16,fp8,2047,0.039638400077819824
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,48,48,128,1,float16,float16,1,0.019595199823379518
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,48,48,128,1,float16,fp8,1,0.020819200575351714
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,48,48,128,1,float16,float16,3,0.01982560008764267
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,48,48,128,1,float16,fp8,3,0.02083040028810501
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,48,48,128,1,float16,float16,7,0.01974720060825348
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,48,48,128,1,float16,fp8,7,0.02093600034713745
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,48,48,128,1,float16,float16,15,0.020129600167274476
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,48,48,128,1,float16,fp8,15,0.020817600190639496
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,48,48,128,1,float16,float16,31,0.019708800315856933
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,48,48,128,1,float16,fp8,31,0.021004800498485566
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,48,48,128,1,float16,fp8,63,0.020615999400615693
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,48,48,128,1,float16,float16,63,0.01979680061340332
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,48,48,128,1,float16,float16,127,0.01998399943113327
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,48,48,128,1,float16,fp8,127,0.021065600216388702
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,48,48,128,1,float16,float16,255,0.02186720073223114
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,48,48,128,1,float16,fp8,255,0.02284640073776245
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,48,48,128,1,float16,float16,511,0.024609600007534028
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,48,48,128,1,float16,fp8,511,0.026681599020957947
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,48,48,128,1,float16,float16,1023,0.039822399616241455
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,48,48,128,1,float16,fp8,1023,0.03845759928226471
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,48,48,128,1,float16,float16,2047,0.055137598514556886
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,48,48,128,1,float16,fp8,2047,0.05686079859733582
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,48,48,128,1,float16,float16,1,0.06312000155448913
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,48,48,128,1,float16,fp8,1,0.07227360010147095
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,48,48,128,1,float16,float16,3,0.0626255989074707
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,48,48,128,1,float16,fp8,3,0.07198879718780518
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,48,48,128,1,float16,float16,7,0.0629647970199585
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,48,48,128,1,float16,fp8,7,0.07189279794692993
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,48,48,128,1,float16,float16,15,0.06286879777908325
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,48,48,128,1,float16,fp8,15,0.07246080040931702
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,48,48,128,1,float16,float16,31,0.06342080235481262
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,48,48,128,1,float16,fp8,31,0.07251840233802795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,48,48,128,1,float16,float16,63,0.06775199770927429
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,48,48,128,1,float16,fp8,63,0.07227839827537537
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,48,48,128,1,float16,float16,127,0.0712112009525299
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,48,48,128,1,float16,fp8,127,0.07932320237159729
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,48,48,128,1,float16,float16,255,0.09301279783248902
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,48,48,128,1,float16,float16,1023,0.22950398921966553
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,48,48,128,1,float16,fp8,255,0.10266560316085815
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,48,48,128,1,float16,float16,511,0.13753919601440429
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,48,48,128,1,float16,fp8,511,0.14782400131225587
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,48,48,128,1,float16,fp8,1023,0.22224481105804444
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,48,48,128,1,float16,float16,1,0.023537600040435792
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,48,48,128,1,float16,float16,2047,0.41526241302490235
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,48,48,128,1,float16,fp8,1,0.025153601169586183
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,48,48,128,1,float16,float16,3,0.02375999987125397
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,48,48,128,1,float16,fp8,2047,0.36221439838409425
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,48,48,128,1,float16,fp8,3,0.02523680031299591
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,48,48,128,1,float16,float16,7,0.02351039946079254
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,48,48,128,1,float16,fp8,7,0.02518559992313385
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,48,48,128,1,float16,float16,15,0.023763200640678404
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,48,48,128,1,float16,fp8,15,0.025279998779296875
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,48,48,128,1,float16,float16,31,0.023839999735355378
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,48,48,128,1,float16,fp8,31,0.025217598676681517
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,48,48,128,1,float16,float16,63,0.023651200532913207
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,48,48,128,1,float16,fp8,63,0.02526400089263916
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,48,48,128,1,float16,float16,127,0.023907199501991272
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,48,48,128,1,float16,fp8,127,0.025367999076843263
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,48,48,128,1,float16,float16,255,0.027612799406051637
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,48,48,128,1,float16,fp8,255,0.029414400458335876
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,48,48,128,1,float16,float16,511,0.037484800815582274
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,48,48,128,1,float16,fp8,511,0.03656319975852966
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,48,48,128,1,float16,float16,1023,0.05523040294647217
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,48,48,128,1,float16,fp8,1,0.23727679252624512
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,48,48,128,1,float16,fp8,1023,0.05755199790000916
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,48,48,128,1,float16,float16,2047,0.08451840281486511
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,48,48,128,1,float16,fp8,2047,0.08325600028038024
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,48,48,128,1,float16,float16,1,0.19478240013122558
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,48,48,128,1,float16,float16,3,0.19529119729995728
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,48,48,128,1,float16,fp8,3,0.2360095977783203
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,48,48,128,1,float16,float16,7,0.1947648048400879
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,48,48,128,1,float16,fp8,7,0.23719680309295654
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,48,48,128,1,float16,float16,15,0.19503840208053588
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,48,48,128,1,float16,fp8,15,0.2359071969985962
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,48,48,128,1,float16,float16,31,0.20785279273986818
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,48,48,128,1,float16,fp8,31,0.2386928081512451
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,48,48,128,1,float16,float16,63,0.20810561180114745
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,48,48,128,1,float16,fp8,63,0.24996159076690674
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,48,48,128,1,float16,float16,127,0.21238400936126708
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,48,48,128,1,float16,fp8,127,0.25601279735565186
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,48,48,128,1,float16,float16,255,0.29658079147338867
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,48,48,128,1,float16,fp8,255,0.3344736099243164
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,48,48,128,1,float16,float16,511,0.4706863880157471
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,48,48,128,1,float16,fp8,511,0.5079823970794678
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,48,48,128,1,float16,float16,1,0.37090559005737306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,48,48,128,1,float16,fp8,1,0.45967998504638674
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,48,48,128,1,float16,float16,3,0.3698208093643188
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,48,48,128,1,float16,float16,1023,0.8346240043640136
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,48,48,128,1,float16,fp8,1023,0.7893743991851807
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,48,48,128,1,float16,fp8,3,0.45740480422973634
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,48,48,128,1,float16,float16,7,0.3715728044509888
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,48,48,128,1,float16,fp8,7,0.4587520122528076
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,48,48,128,1,float16,fp8,31,0.5103519916534424
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,48,48,128,1,float16,float16,15,0.3825648069381714
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,48,48,128,1,float16,fp8,15,0.4588335990905762
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,48,48,128,1,float16,float16,31,0.3881472110748291
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,48,48,128,1,float16,float16,63,0.39381439685821534
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,48,48,128,1,float16,fp8,63,0.4802815914154053
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,48,48,128,1,float16,float16,127,0.40561761856079104
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,48,48,128,1,float16,fp8,127,0.49277281761169434
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,48,48,128,1,float16,float16,255,0.583345603942871
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,48,48,128,1,float16,float16,1,0.7264256000518798
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,48,48,128,1,float16,fp8,255,0.652668809890747
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,48,48,128,1,float16,fp8,1,0.8994336128234863
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,48,48,128,1,float16,float16,3,0.7323904037475586
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,48,48,128,1,float16,fp8,3,0.8951071739196778
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,48,48,128,1,float16,float16,7,0.7534031867980957
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,48,48,128,1,float16,fp8,7,0.884068775177002
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,48,48,128,1,float16,float16,15,0.7642735958099365
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,48,48,128,1,float16,fp8,15,0.9222319602966309
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,48,48,128,1,float16,float16,31,0.7573999881744384
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,48,48,128,1,float16,fp8,31,0.9447168350219727
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,48,48,128,1,float16,float16,63,0.7637519836425781
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,48,48,128,1,float16,float16,1,0.028483200073242187
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,48,48,128,1,float16,fp8,1,0.030299198627471925
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,48,48,128,1,float16,fp8,63,0.9385680198669434
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,48,48,128,1,float16,float16,3,0.02825759947299957
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,48,48,128,1,float16,float16,127,0.7948304176330566
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,48,48,128,1,float16,fp8,3,0.03038559854030609
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,48,48,128,1,float16,fp8,127,0.9645855903625489
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,48,48,128,1,float16,float16,7,0.028275200724601747
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,48,48,128,1,float16,fp8,7,0.030704000592231752
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,48,48,128,1,float16,float16,15,0.028064000606536865
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,48,48,128,1,float16,fp8,15,0.0305184006690979
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,48,48,128,1,float16,float16,31,0.028089600801467895
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,48,48,128,1,float16,fp8,31,0.03017280101776123
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,48,48,128,1,float16,float16,63,0.028091201186180116
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,48,48,128,1,float16,fp8,63,0.03043999969959259
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,48,48,128,1,float16,fp8,511,0.05298240184783935
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,48,48,128,1,float16,float16,127,0.02826400101184845
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,48,48,128,1,float16,fp8,127,0.030904000997543334
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,48,48,128,1,float16,float16,255,0.037676799297332766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,48,48,128,1,float16,fp8,255,0.03638879954814911
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,48,48,128,1,float16,float16,511,0.04997279942035675
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,48,48,128,1,float16,float16,1023,0.07569760084152222
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,48,48,128,1,float16,fp8,1023,0.07498720288276672
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,48,48,128,1,float16,float16,2047,0.1246608018875122
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,48,48,128,1,float16,fp8,2047,0.11186879873275757
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,48,48,128,1,float16,float16,1,1.4849712371826171
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,48,48,128,1,float16,fp8,1,1.7496736526489258
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,48,48,128,1,float16,float16,3,1.4850223541259766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,48,48,128,1,float16,float16,7,1.485801601409912
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,48,48,128,1,float16,fp8,3,1.7950544357299805
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,48,48,128,1,float16,float16,15,1.494638442993164
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,48,48,128,1,float16,fp8,7,1.8638879776000976
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,48,48,128,1,float16,fp8,15,1.8666208267211915
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,48,48,128,1,float16,float16,31,1.4758735656738282
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,48,48,128,1,float16,float16,63,1.500699234008789
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,48,48,128,1,float16,fp8,31,1.8682912826538085
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,48,48,128,1,float16,fp8,63,1.8495792388916015
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,48,48,128,1,float16,float16,1,2.938115119934082
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,48,48,128,1,float16,float16,3,2.9386959075927734
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,48,48,128,1,float16,fp8,1,3.707708740234375
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,48,48,128,1,float16,float16,1,0.03985120058059692
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,48,48,128,1,float16,float16,7,2.919059181213379
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,48,48,128,1,float16,fp8,1,0.044433599710464476
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,48,48,128,1,float16,fp8,3,3.6777488708496096
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,48,48,128,1,float16,float16,3,0.03960480093955994
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,48,48,128,1,float16,float16,15,2.934833526611328
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,48,48,128,1,float16,float16,7,0.03925600051879883
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,48,48,128,1,float16,fp8,7,0.04433279931545257
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,48,48,128,1,float16,fp8,3,0.04423199892044068
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,48,48,128,1,float16,fp8,7,3.7133201599121093
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,48,48,128,1,float16,fp8,31,0.04463520050048828
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,48,48,128,1,float16,float16,15,0.03967039883136749
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,48,48,128,1,float16,fp8,15,0.04411520063877106
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,48,48,128,1,float16,float16,31,0.03962399959564209
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,48,48,128,1,float16,float16,31,2.939803123474121
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,48,48,128,1,float16,float16,63,0.03997600078582764
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,48,48,128,1,float16,fp8,63,0.04447839856147766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,48,48,128,1,float16,fp8,15,3.6913520812988283
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,48,48,128,1,float16,float16,127,0.04542720019817352
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,48,48,128,1,float16,fp8,127,0.044302400946617124
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,48,48,128,1,float16,float16,255,0.058601599931716916
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,48,48,128,1,float16,fp8,31,3.6813518524169924
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,48,48,128,1,float16,fp8,255,0.06290240287780761
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,48,48,128,1,float16,float16,511,0.07965919971466065
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,48,48,128,1,float16,fp8,511,0.08571839928627015
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,48,48,128,1,float16,float16,1023,0.1285215973854065
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,48,48,128,1,float16,fp8,1023,0.12418080568313598
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,48,48,128,1,float16,float16,2047,0.2205984115600586
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,48,48,128,1,float16,fp8,2047,0.19447200298309325
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,40,40,128,1,float16,float16,1,0.09270399808883667
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,40,40,128,1,float16,fp8,1,0.10965280532836914
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,40,40,128,1,float16,float16,3,0.09262239933013916
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,40,40,128,1,float16,fp8,3,0.11006400585174561
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,40,40,128,1,float16,float16,7,0.09309599995613098
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,40,40,128,1,float16,fp8,7,0.10936640501022339
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,40,40,128,1,float16,float16,15,0.0925055980682373
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,40,40,128,1,float16,fp8,15,0.10957599878311157
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,40,40,128,1,float16,float16,31,0.09312639832496643
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,40,40,128,1,float16,fp8,31,0.10965759754180908
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,40,40,128,1,float16,float16,63,0.10044640302658081
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,40,40,128,1,float16,fp8,63,0.11406559944152832
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,40,40,128,1,float16,float16,127,0.10170719623565674
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,40,40,128,1,float16,fp8,127,0.12066080570220947
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,40,40,128,1,float16,float16,255,0.13765759468078614
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,40,40,128,1,float16,fp8,255,0.1564288020133972
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,40,40,128,1,float16,float16,511,0.2108720064163208
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,40,40,128,1,float16,fp8,511,0.22905280590057372
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,40,40,128,1,float16,float16,1023,0.36719839572906493
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,40,40,128,1,float16,float16,2047,0.6746848106384278
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,40,40,128,1,float16,fp8,1023,0.35479519367218015
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,40,40,128,1,float16,float16,1,0.019153599441051484
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,40,40,128,1,float16,fp8,1,0.020124800503253937
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,40,40,128,1,float16,float16,3,0.01915999948978424
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,40,40,128,1,float16,fp8,2047,0.5806848049163819
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,40,40,128,1,float16,fp8,3,0.020252799987792967
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,40,40,128,1,float16,float16,7,0.019412800669670105
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,40,40,128,1,float16,fp8,7,0.020606400072574617
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,40,40,128,1,float16,float16,15,0.0192671999335289
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,40,40,128,1,float16,fp8,15,0.020113599300384522
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,40,40,128,1,float16,float16,31,0.019355200231075287
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,40,40,128,1,float16,fp8,31,0.020075200498104094
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,40,40,128,1,float16,float16,63,0.019368000328540802
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,40,40,128,1,float16,fp8,63,0.020260800421237946
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,40,40,128,1,float16,float16,127,0.019564799964427948
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,40,40,128,1,float16,fp8,127,0.02048799991607666
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,40,40,128,1,float16,float16,255,0.021303999423980712
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,40,40,128,1,float16,fp8,255,0.02234079986810684
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,40,40,128,1,float16,float16,511,0.02444159984588623
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,40,40,128,1,float16,fp8,511,0.026118400692939758
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,40,40,128,1,float16,float16,1023,0.02574400007724762
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,40,40,128,1,float16,fp8,1023,0.027476799488067628
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,40,40,128,1,float16,float16,2047,0.03155680000782013
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,40,40,128,1,float16,fp8,2047,0.03150720000267029
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,40,40,128,1,float16,float16,1,0.019823999702930452
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,40,40,128,1,float16,fp8,1,0.020777599513530733
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,40,40,128,1,float16,float16,3,0.019883200526237488
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,40,40,128,1,float16,fp8,3,0.020452800393104553
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,40,40,128,1,float16,float16,7,0.01955839991569519
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,40,40,128,1,float16,fp8,7,0.02051839977502823
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,40,40,128,1,float16,float16,15,0.019704000651836397
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,40,40,128,1,float16,fp8,15,0.020713600516319274
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,40,40,128,1,float16,float16,31,0.020043200254440306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,40,40,128,1,float16,fp8,31,0.02059199959039688
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,40,40,128,1,float16,float16,63,0.01998240053653717
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,40,40,128,1,float16,fp8,63,0.020641599595546723
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,40,40,128,1,float16,float16,127,0.01988479942083359
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,40,40,128,1,float16,fp8,127,0.020497600734233856
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,40,40,128,1,float16,float16,255,0.021611200273036958
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,40,40,128,1,float16,fp8,255,0.022977599501609804
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,40,40,128,1,float16,float16,511,0.024929599463939668
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,40,40,128,1,float16,fp8,511,0.02651520073413849
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,40,40,128,1,float16,float16,1023,0.0369376003742218
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,40,40,128,1,float16,fp8,1023,0.03786399960517883
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,40,40,128,1,float16,float16,2047,0.05507040023803711
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,40,40,128,1,float16,fp8,2047,0.05119519829750061
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,40,40,128,1,float16,float16,1,0.05531200170516968
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,40,40,128,1,float16,fp8,1,0.06284160017967225
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,40,40,128,1,float16,float16,3,0.055174398422241214
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,40,40,128,1,float16,fp8,3,0.06301599740982056
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,40,40,128,1,float16,float16,7,0.05494239926338196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,40,40,128,1,float16,fp8,7,0.06326559782028199
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,40,40,128,1,float16,float16,15,0.055174398422241214
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,40,40,128,1,float16,fp8,15,0.06350719928741455
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,40,40,128,1,float16,float16,31,0.05482879877090454
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,40,40,128,1,float16,fp8,31,0.06346399784088134
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,40,40,128,1,float16,float16,63,0.05778080224990845
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,40,40,128,1,float16,fp8,63,0.06288319826126099
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,40,40,128,1,float16,float16,127,0.0629472017288208
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,40,40,128,1,float16,fp8,127,0.06676480174064636
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,40,40,128,1,float16,float16,255,0.08228160142898559
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,40,40,128,1,float16,fp8,255,0.08887680172920227
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,40,40,128,1,float16,float16,511,0.11943680047988892
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,40,40,128,1,float16,fp8,511,0.12667839527130126
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,40,40,128,1,float16,float16,1023,0.19787520170211792
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,40,40,128,1,float16,float16,1,0.02362080067396164
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,40,40,128,1,float16,fp8,1023,0.18999840021133424
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,40,40,128,1,float16,float16,2047,0.3521343946456909
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,40,40,128,1,float16,fp8,1,0.02491360008716583
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,40,40,128,1,float16,fp8,2047,0.3058784008026123
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,40,40,128,1,float16,float16,3,0.023571200668811798
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,40,40,128,1,float16,fp8,3,0.025092801451683043
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,40,40,128,1,float16,float16,7,0.023489600419998168
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,40,40,128,1,float16,fp8,7,0.025150400400161744
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,40,40,128,1,float16,float16,15,0.02348479926586151
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,40,40,128,1,float16,fp8,15,0.025099200010299683
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,40,40,128,1,float16,float16,31,0.023443199694156647
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,40,40,128,1,float16,fp8,31,0.025047999620437623
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,40,40,128,1,float16,float16,63,0.023579199612140656
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,40,40,128,1,float16,fp8,63,0.02499680072069168
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,40,40,128,1,float16,float16,127,0.02372319996356964
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,40,40,128,1,float16,fp8,127,0.025412800908088683
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,40,40,128,1,float16,float16,255,0.027268800139427184
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,40,40,128,1,float16,fp8,255,0.029049599170684816
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,40,40,128,1,float16,float16,511,0.03634400069713593
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,40,40,128,1,float16,fp8,511,0.036771199107170104
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,40,40,128,1,float16,float16,1023,0.054065597057342527
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,40,40,128,1,float16,fp8,1023,0.056092798709869385
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,40,40,128,1,float16,float16,2047,0.08401119709014893
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,40,40,128,1,float16,fp8,2047,0.08280959725379944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,40,40,128,1,float16,float16,1,0.1647744059562683
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,40,40,128,1,float16,fp8,1,0.19984480142593383
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,40,40,128,1,float16,float16,3,0.1641103982925415
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,40,40,128,1,float16,fp8,3,0.2008944034576416
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,40,40,128,1,float16,float16,7,0.1659168004989624
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,40,40,128,1,float16,fp8,7,0.19925919771194459
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,40,40,128,1,float16,float16,15,0.1668287992477417
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,40,40,128,1,float16,fp8,15,0.20090079307556152
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,40,40,128,1,float16,float16,31,0.17164479494094848
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,40,40,128,1,float16,fp8,31,0.20261600017547607
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,40,40,128,1,float16,float16,255,0.252455997467041
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,40,40,128,1,float16,float16,63,0.1753648042678833
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,40,40,128,1,float16,fp8,63,0.21209280490875243
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,40,40,128,1,float16,float16,127,0.18053760528564453
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,40,40,128,1,float16,fp8,127,0.21597120761871338
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,40,40,128,1,float16,fp8,255,0.2865360021591187
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,40,40,128,1,float16,float16,511,0.39008479118347167
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,40,40,128,1,float16,fp8,511,0.42549757957458495
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,40,40,128,1,float16,float16,1,0.3120575904846191
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,40,40,128,1,float16,float16,1023,0.7087376117706299
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,40,40,128,1,float16,fp8,1,0.38467519283294677
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,40,40,128,1,float16,float16,3,0.3110975980758667
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,40,40,128,1,float16,fp8,1023,0.6532512187957764
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,40,40,128,1,float16,fp8,3,0.38809919357299805
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,40,40,128,1,float16,float16,7,0.31246559619903563
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,40,40,128,1,float16,fp8,7,0.3865776062011719
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,40,40,128,1,float16,float16,15,0.31969599723815917
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,40,40,128,1,float16,fp8,15,0.3866080045700073
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,40,40,128,1,float16,float16,31,0.3292128086090088
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,40,40,128,1,float16,fp8,31,0.4042543888092041
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,40,40,128,1,float16,float16,63,0.3301248073577881
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,40,40,128,1,float16,fp8,63,0.4091519832611084
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,40,40,128,1,float16,float16,127,0.34117279052734373
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,40,40,128,1,float16,fp8,127,0.41872639656066896
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,40,40,128,1,float16,float16,255,0.48233761787414553
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,40,40,128,1,float16,fp8,255,0.5479375839233398
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,40,40,128,1,float16,float16,1,0.606499195098877
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,40,40,128,1,float16,fp8,1,0.7524079799652099
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,40,40,128,1,float16,float16,3,0.6026112079620362
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,40,40,128,1,float16,fp8,3,0.7588448047637939
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,40,40,128,1,float16,float16,7,0.6189919948577881
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,40,40,128,1,float16,float16,31,0.6383135795593262
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,40,40,128,1,float16,fp8,7,0.7577951908111572
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,40,40,128,1,float16,float16,15,0.6372735977172852
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,40,40,128,1,float16,fp8,15,0.7582064151763916
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,40,40,128,1,float16,fp8,31,0.7892752170562745
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,40,40,128,1,float16,float16,63,0.6473775863647461
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,40,40,128,1,float16,float16,1,0.027860799431800844
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,40,40,128,1,float16,fp8,63,0.7904687881469726
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,40,40,128,1,float16,fp8,3,0.030084800720214844
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,40,40,128,1,float16,fp8,1,0.0300464004278183
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,40,40,128,1,float16,float16,127,0.6632991790771484
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,40,40,128,1,float16,float16,3,0.028071999549865723
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,40,40,128,1,float16,fp8,127,0.814568042755127
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,40,40,128,1,float16,float16,7,0.027772799134254456
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,40,40,128,1,float16,fp8,7,0.029985600709915163
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,40,40,128,1,float16,float16,15,0.028046399354934692
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,40,40,128,1,float16,fp8,15,0.030113598704338072
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,40,40,128,1,float16,float16,31,0.027774399518966673
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,40,40,128,1,float16,fp8,31,0.03001439869403839
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,40,40,128,1,float16,float16,63,0.02813119888305664
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,40,40,128,1,float16,fp8,63,0.030084800720214844
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,40,40,128,1,float16,float16,127,0.02797119915485382
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,40,40,128,1,float16,fp8,127,0.0302592009305954
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,40,40,128,1,float16,float16,255,0.03524479866027832
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,40,40,128,1,float16,fp8,255,0.03638719916343689
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,40,40,128,1,float16,float16,511,0.04855040013790131
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,40,40,128,1,float16,fp8,511,0.05037760138511658
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,40,40,128,1,float16,float16,1023,0.07264000177383423
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,40,40,128,1,float16,fp8,1023,0.07480159997940064
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,40,40,128,1,float16,float16,2047,0.12085280418395997
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,40,40,128,1,float16,fp8,2047,0.11172000169754029
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,40,40,128,1,float16,float16,1,1.243449592590332
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,40,40,128,1,float16,fp8,1,1.4309215545654297
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,40,40,128,1,float16,float16,3,1.2409071922302246
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,40,40,128,1,float16,fp8,3,1.4638879776000977
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,40,40,128,1,float16,float16,7,1.2398544311523438
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,40,40,128,1,float16,float16,15,1.245089626312256
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,40,40,128,1,float16,fp8,7,1.495582389831543
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,40,40,128,1,float16,fp8,15,1.556340789794922
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,40,40,128,1,float16,float16,31,1.2352352142333984
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,40,40,128,1,float16,fp8,31,1.5574159622192383
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,40,40,128,1,float16,float16,63,1.2563615798950196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,40,40,128,1,float16,fp8,63,1.5388480186462403
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,40,40,128,1,float16,float16,1,2.4555055618286135
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,40,40,128,1,float16,float16,3,2.4509023666381835
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,40,40,128,1,float16,fp8,1,3.0963472366333007
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,40,40,128,1,float16,float16,7,2.450254440307617
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,40,40,128,1,float16,float16,1,0.03604480028152466
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,40,40,128,1,float16,fp8,3,3.066132736206055
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,40,40,128,1,float16,fp8,1,0.03975839912891388
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,40,40,128,1,float16,float16,15,2.4507728576660157
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,40,40,128,1,float16,float16,3,0.03623520135879517
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,40,40,128,1,float16,fp8,7,3.0939071655273436
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,40,40,128,1,float16,fp8,3,0.03973279893398285
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,40,40,128,1,float16,float16,7,0.03627839982509613
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,40,40,128,1,float16,fp8,15,3.0816671371459963
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,40,40,128,1,float16,fp8,7,0.03987039923667908
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,40,40,128,1,float16,float16,15,0.03631199896335602
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,40,40,128,1,float16,float16,31,2.4585567474365235
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,40,40,128,1,float16,fp8,15,0.039929598569869995
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,40,40,128,1,float16,float16,31,0.03608160018920899
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,40,40,128,1,float16,fp8,31,0.03983519971370697
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,40,40,128,1,float16,float16,63,0.03617439866065979
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,40,40,128,1,float16,fp8,31,3.0644880294799806
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,40,40,128,1,float16,float16,511,0.06959360241889953
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,40,40,128,1,float16,fp8,63,0.03971840143203735
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,40,40,128,1,float16,float16,127,0.03863039910793305
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,40,40,128,1,float16,fp8,127,0.040068799257278444
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,40,40,128,1,float16,float16,255,0.05237119793891907
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,40,40,128,1,float16,fp8,255,0.05272960066795349
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,40,40,128,1,float16,fp8,511,0.07414240241050721
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,40,40,128,1,float16,float16,1023,0.11283680200576782
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,40,40,128,1,float16,fp8,1023,0.10875840187072754
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,40,40,128,1,float16,float16,2047,0.18624800443649292
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,1,0.07817919850349427
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,40,40,128,1,float16,fp8,2047,0.16893279552459717
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,1,0.09139999747276306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,3,0.07763360142707824
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,3,0.0909056007862091
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,31,0.09180960059165955
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,7,0.0780463993549347
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,7,0.0913263976573944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,15,0.07813599705696106
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,15,0.09118559956550598
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,31,0.07880480289459228
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,63,0.08399839997291565
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,63,0.09135199785232544
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,127,0.08714560270309449
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,127,0.10114560127258301
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,255,0.11568959951400756
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,255,0.12985440492630004
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,511,0.1743407964706421
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,511,0.1901263952255249
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,1023,0.2964688062667847
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,1023,0.28843839168548585
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,2047,0.5450575828552247
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,1,0.019232000410556793
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,2047,0.4735743999481201
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,1,0.020351999998092653
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,3,0.019256000220775605
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,3,0.020193600654602052
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,7,0.019256000220775605
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,4095,1.061134433746338
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,4095,0.8707200050354004
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,7,0.020319999754428865
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,15,0.019620800018310548
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,15,0.020252799987792967
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,31,0.019276799261569978
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,31,0.020017600059509276
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,63,0.019252799451351166
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,63,0.020180800557136537
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,127,0.02075680047273636
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,127,0.01934880018234253
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,255,0.02070239931344986
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,255,0.02194879949092865
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,511,0.02396800071001053
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,511,0.025652799010276794
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,1023,0.025167998671531678
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,1023,0.02547999918460846
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,2047,0.027340799570083618
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,2047,0.02908959984779358
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,4095,0.04002400040626526
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,4095,0.0353408008813858
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,1,0.019891199469566346
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,1,0.02057439982891083
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,3,0.019819200038909912
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,3,0.02078080028295517
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,7,0.01964640021324158
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,7,0.020824000239372253
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,127,0.02030239999294281
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,15,0.019577600061893463
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,15,0.020763200521469117
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,31,0.019849599897861482
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,31,0.020665599405765532
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,63,0.019785599410533906
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,63,0.020508800446987153
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,127,0.020476800203323365
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,255,0.02173440009355545
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,255,0.022728000581264497
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,511,0.024644799530506134
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,511,0.026598399877548216
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,1023,0.027603200078010558
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,1023,0.029558399319648744
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,2047,0.040161600708961485
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,2047,0.03540000021457672
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,4095,0.05620319843292236
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,4095,0.054553598165512085
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,1,0.0474368005990982
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,1,0.05405920147895813
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,3,0.04764159917831421
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,3,0.05378239750862122
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,7,0.04760479927062988
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,7,0.05364959836006165
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,15,0.04784800112247467
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,15,0.0536575973033905
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,31,0.04757600128650665
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,31,0.05396639704704285
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,63,0.047751998901367186
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,63,0.05382080078125
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,127,0.054686397314071655
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,127,0.05469920039176941
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,255,0.07024000287055969
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,255,0.0752672016620636
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,511,0.09924960136413574
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,511,0.10566400289535523
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,1023,0.16447679996490477
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,1023,0.15727360248565675
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,2047,0.28782880306243896
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,2047,0.2489248037338257
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,1,0.01988479942083359
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,1,0.02101760059595108
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,4095,0.5404767990112305
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,3,0.02003840059041977
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,4095,0.45232157707214354
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,3,0.020931200683116914
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,7,0.020175999402999877
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,7,0.020972800254821778
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,15,0.019940799474716185
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,15,0.02088160067796707
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,31,0.02014880031347275
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,31,0.020948800444602966
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,63,0.02038239985704422
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,63,0.021211199462413788
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,127,0.02024639993906021
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,127,0.021191999316215515
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,255,0.022096000611782074
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,255,0.02326720058917999
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,511,0.025383999943733214
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,4095,0.08719040155410766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,511,0.027169600129127502
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,1023,0.03940800130367279
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,32,128,1,float16,float16,1,0.13670079708099364
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,32,128,1,float16,float16,3,0.13656799793243407
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,1023,0.0351936012506485
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,2047,0.05533599853515625
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,2047,0.05383840203285217
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,4095,0.07986720204353333
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,32,128,1,float16,fp8,1,0.16500799655914306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,32,128,1,float16,fp8,3,0.1651919960975647
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,32,128,1,float16,float16,7,0.13666880130767822
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,32,128,1,float16,fp8,7,0.16548320055007934
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,32,128,1,float16,float16,15,0.13685280084609985
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,32,128,1,float16,fp8,15,0.16546239852905273
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,32,128,1,float16,float16,31,0.13950400352478026
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,32,128,1,float16,fp8,31,0.16546720266342163
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,32,128,1,float16,float16,63,0.14586399793624877
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,32,128,1,float16,fp8,63,0.1755247950553894
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,32,128,1,float16,float16,127,0.14983359575271607
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,32,128,1,float16,fp8,127,0.18035839796066283
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,32,128,1,float16,float16,255,0.2071615934371948
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,32,128,1,float16,fp8,255,0.23139679431915283
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,32,128,1,float16,float16,511,0.31370561122894286
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,32,128,1,float16,fp8,511,0.35093600749969484
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,32,128,1,float16,float16,1023,0.5759151935577392
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,32,128,1,float16,float16,1,0.25182878971099854
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,32,128,1,float16,fp8,1023,0.5476111888885498
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,32,128,1,float16,fp8,1,0.31224000453948975
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,32,128,1,float16,float16,3,0.25339200496673586
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,32,128,1,float16,float16,7,0.25208320617675783
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,32,128,1,float16,float16,2047,1.0562928199768067
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,32,128,1,float16,fp8,3,0.3132256031036377
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,32,128,1,float16,fp8,7,0.3127648115158081
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,32,128,1,float16,float16,15,0.2560496091842651
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,32,128,1,float16,fp8,2047,0.9197567939758301
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,32,128,1,float16,fp8,15,0.3112191915512085
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,32,128,1,float16,fp8,63,0.3319808006286621
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,32,128,1,float16,float16,31,0.26571199893951414
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,32,128,1,float16,fp8,31,0.34054079055786135
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,32,128,1,float16,float16,63,0.2726703882217407
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,32,128,1,float16,float16,127,0.27983999252319336
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,32,128,1,float16,fp8,127,0.3424511909484863
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,32,128,1,float16,float16,255,0.39219520092010496
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,32,128,1,float16,fp8,255,0.4430975914001465
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,32,128,1,float16,float16,1,0.4915679931640625
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,32,128,1,float16,fp8,1,0.6046895980834961
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,32,128,1,float16,fp8,511,0.6692111968994141
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,32,128,1,float16,float16,511,0.6191487789154053
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,32,128,1,float16,float16,3,0.49016318321228025
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,32,128,1,float16,fp8,3,0.6035280227661133
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,32,128,1,float16,float16,7,0.4932096004486084
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,32,128,1,float16,float16,15,0.5123663902282715
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,32,128,1,float16,fp8,7,0.6065375804901123
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,32,128,1,float16,fp8,15,0.6016464233398438
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,32,128,1,float16,float16,31,0.5112800121307373
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,32,128,1,float16,fp8,63,0.6341055870056153
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,32,128,1,float16,fp8,31,0.6411136150360107
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,32,128,1,float16,float16,63,0.5223360061645508
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,32,128,1,float16,float16,127,0.5325520038604736
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,32,128,1,float16,fp8,127,0.651638412475586
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,1,0.02401919960975647
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,1,0.025624001026153566
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,32,128,1,float16,float16,255,0.7602767944335938
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,3,0.02406879961490631
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,3,0.02561599910259247
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,32,128,1,float16,fp8,255,0.8568240165710449
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,7,0.02419999986886978
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,7,0.025766399502754212
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,15,0.024260799586772918
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,15,0.02563999891281128
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,31,0.02423200011253357
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,31,0.02563680112361908
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,63,0.02390879988670349
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,63,0.025835201144218445
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,127,0.02421119958162308
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,127,0.026233598589897156
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,255,0.027977600693702698
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,255,0.029931199550628663
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,511,0.04005599915981293
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,511,0.03729279935359955
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,1023,0.05726079940795899
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,1023,0.058113598823547365
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,2047,0.08943359851837158
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,2047,0.08267679810523987
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,4095,0.1524656057357788
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,4095,0.13409600257873536
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,32,128,1,float16,float16,1,0.9934271812438965
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,32,128,1,float16,fp8,1,1.1700400352478026
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,32,128,1,float16,float16,3,0.9943087577819825
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,32,128,1,float16,fp8,3,1.1641599655151367
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,32,128,1,float16,fp8,7,1.1676912307739258
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,32,128,1,float16,float16,7,0.9976160049438476
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,32,128,1,float16,float16,15,0.990932846069336
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,32,128,1,float16,fp8,15,1.2494704246520996
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,32,128,1,float16,float16,31,1.0052160263061523
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,32,128,1,float16,fp8,31,1.2478511810302735
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,32,128,1,float16,float16,63,1.0133456230163573
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,32,128,1,float16,fp8,63,1.239292812347412
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,32,128,1,float16,float16,127,1.038212776184082
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,32,128,1,float16,fp8,127,1.2653167724609375
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,32,128,1,float16,float16,1,1.9484111785888671
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,32,128,1,float16,float16,3,1.9650352478027344
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,32,128,1,float16,fp8,1,2.4630111694335937
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,32,128,1,float16,float16,7,1.9648815155029298
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,32,128,1,float16,fp8,3,2.4795391082763674
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,32,128,1,float16,float16,15,1.966414451599121
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,32,128,1,float16,fp8,7,2.4671648025512694
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,1,0.032158398628234865
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,1,0.03503040075302124
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,3,0.03198879957199097
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,32,128,1,float16,float16,31,1.9585920333862306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,32,128,1,float16,fp8,15,2.4573951721191407
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,3,0.03497759997844696
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,7,0.03220320045948029
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,7,0.035252800583839415
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,32,128,1,float16,float16,63,1.9793392181396485
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,32,128,1,float16,fp8,31,2.4488672256469726
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,15,0.032278400659561154
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,32,128,1,float16,fp8,63,2.42872314453125
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,15,0.035011199116706845
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,31,0.0322847992181778
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,31,0.03510720133781433
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,63,0.03252640068531036
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,63,0.035364800691604616
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,127,0.032416000962257385
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,127,0.03543039858341217
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,255,0.0456928014755249
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,255,0.04333600103855133
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,511,0.06002399921417236
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,511,0.06409119963645935
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,1023,0.0952672004699707
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,1023,0.09119679927825927
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,2047,0.15459680557250977
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,1,0.07264320254325866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,2047,0.13903360366821288
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,1,0.06305599808692933
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,4095,0.2793263912200928
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,4095,0.23997120857238768
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,3,0.06321759819984436
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,3,0.07239360213279725
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,7,0.06302239894866943
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,7,0.0727728009223938
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,15,0.0629584014415741
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,15,0.072680002450943
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,31,0.06333600282669068
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,31,0.0723360002040863
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,63,0.06858400106430054
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,63,0.07282400131225586
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,127,0.07164480090141297
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,127,0.08038399815559387
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,255,0.09404799938201905
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,255,0.10308480262756348
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,511,0.13846559524536134
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,511,0.14658080339431762
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,1023,0.2326064109802246
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,1023,0.22330079078674317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,2047,0.41478400230407714
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,2047,0.3636320114135742
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,1,0.019368000328540802
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,1,0.02019200026988983
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,3,0.019307200610637665
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,3,0.020151999592781068
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,4095,0.7876815795898438
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,4095,0.6632959842681885
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,7,0.01921280026435852
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,7,0.020158399641513825
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,15,0.0189968004822731
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,15,0.020193600654602052
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,31,0.01924159973859787
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,31,0.02011999934911728
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,63,0.019596800208091736
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,63,0.019870400428771973
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,127,0.019392000138759614
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,127,0.0201664000749588
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,255,0.020953600108623505
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,255,0.021879999339580535
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,511,0.02409600019454956
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,511,0.025707200169563293
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,1023,0.023636800050735474
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,1023,0.025043201446533204
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,2047,0.026894399523735048
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,2047,0.02730720043182373
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,4095,0.035836800932884216
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,4095,0.0332751989364624
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,1,0.01947840005159378
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,1,0.02019519954919815
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,3,0.019526399672031403
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,3,0.02070080041885376
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,7,0.01942880004644394
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,7,0.020670400559902193
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,15,0.019870400428771973
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,15,0.020457600057125092
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,31,0.019449600577354433
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,31,0.020241600275039674
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,63,0.01983039975166321
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,63,0.0205935999751091
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,127,0.019867199659347533
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,127,0.020496000349521638
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,255,0.02162880003452301
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,255,0.02245440036058426
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,511,0.02444480061531067
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,511,0.02608320116996765
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,1023,0.02731040120124817
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,1023,0.03200959861278534
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,2047,0.042735999822616576
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,2047,0.03981600105762482
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,4095,0.06320639848709106
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,4095,0.0638480007648468
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,1,0.040064001083374025
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,1,0.04450559914112091
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,3,0.039959999918937686
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,3,0.04451520144939423
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,7,0.03987999856472015
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,7,0.0446943998336792
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,15,0.04023680090904236
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,15,0.044582399725914004
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,31,0.03988640010356903
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,31,0.04468159973621368
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,63,0.04016959965229035
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,63,0.04472480118274689
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,127,0.045556798577308655
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,127,0.044947201013565065
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,255,0.05836319923400879
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,255,0.06178879737854004
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,511,0.07987840175628662
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,511,0.08496320247650146
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,1023,0.1283951997756958
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,1023,0.12535840272903442
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,2047,0.2213200092315674
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,2047,0.19692000150680541
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,1,0.019926400482654573
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,4095,0.4063839912414551
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,1,0.02098879963159561
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,4095,0.3454319953918457
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,3,0.019729599356651306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,3,0.02070080041885376
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,7,0.01976799964904785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,7,0.020623999834060668
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,15,0.020105600357055664
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,15,0.020982399582862854
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,31,0.01988160014152527
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,31,0.02088160067796707
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,63,0.01982080042362213
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,63,0.020846399664878845
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,127,0.020233599841594695
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,127,0.020828799903392793
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,255,0.021807999908924104
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,255,0.02305919975042343
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,511,0.025092801451683043
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,511,0.02685759961605072
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,1023,0.04020160138607025
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,1023,0.03814719915390015
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,2047,0.055327999591827395
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,2047,0.056246399879455566
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,4095,0.08548960089683533
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,4095,0.08240479826927186
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,24,24,128,1,float16,float16,1,0.10769599676132202
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,24,24,128,1,float16,fp8,1,0.12871040105819703
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,24,24,128,1,float16,float16,3,0.10760639905929566
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,24,24,128,1,float16,fp8,3,0.1291424036026001
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,24,24,128,1,float16,float16,7,0.10705599784851075
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,24,24,128,1,float16,fp8,7,0.12930879592895508
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,24,24,128,1,float16,float16,15,0.10788639783859252
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,24,24,128,1,float16,fp8,15,0.12824640274047852
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,24,24,128,1,float16,float16,31,0.10991519689559937
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,24,24,128,1,float16,fp8,31,0.12910239696502684
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,24,24,128,1,float16,float16,63,0.11636960506439209
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,24,24,128,1,float16,fp8,63,0.13755199909210206
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,24,24,128,1,float16,float16,127,0.11847519874572754
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,24,24,128,1,float16,fp8,127,0.14230719804763795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,24,24,128,1,float16,float16,255,0.16173280477523805
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,24,24,128,1,float16,fp8,255,0.18084479570388795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,24,24,128,1,float16,float16,511,0.24792799949645997
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,24,24,128,1,float16,fp8,511,0.27222719192504885
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,24,24,128,1,float16,float16,1023,0.43767361640930175
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,24,24,128,1,float16,fp8,1023,0.4166975975036621
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,24,24,128,1,float16,float16,1,0.19569599628448486
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,24,24,128,1,float16,fp8,1,0.2375999927520752
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,24,24,128,1,float16,float16,2047,0.8111984252929687
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,24,24,128,1,float16,float16,3,0.19589439630508423
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,24,24,128,1,float16,fp8,3,0.23868799209594727
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,24,24,128,1,float16,fp8,2047,0.697763204574585
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,24,24,128,1,float16,float16,7,0.1962208032608032
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,24,24,128,1,float16,fp8,7,0.23753600120544432
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,24,24,128,1,float16,float16,15,0.19681119918823242
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,24,24,128,1,float16,fp8,15,0.2373055934906006
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,24,24,128,1,float16,float16,31,0.23086400032043458
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,24,24,128,1,float16,fp8,31,0.24135999679565429
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,24,24,128,1,float16,float16,63,0.20836479663848878
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,24,24,128,1,float16,fp8,63,0.25331840515136717
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,24,24,128,1,float16,float16,127,0.2136336088180542
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,24,24,128,1,float16,fp8,127,0.2596944093704224
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,24,24,128,1,float16,float16,255,0.2973599910736084
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,24,24,128,1,float16,fp8,511,0.5123040199279785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,24,24,128,1,float16,fp8,1,0.45464320182800294
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,24,24,128,1,float16,fp8,255,0.333950400352478
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,24,24,128,1,float16,fp8,3,0.45459837913513185
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,24,24,128,1,float16,float16,511,0.467244815826416
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,24,24,128,1,float16,float16,1,0.373417592048645
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,24,24,128,1,float16,float16,3,0.373089599609375
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,24,24,128,1,float16,float16,7,0.37389121055603025
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,24,24,128,1,float16,fp8,7,0.45751519203186036
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,24,24,128,1,float16,float16,15,0.3869312047958374
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,24,24,128,1,float16,fp8,15,0.45457282066345217
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,24,24,128,1,float16,float16,31,0.3911855936050415
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,24,24,128,1,float16,fp8,31,0.4877935886383057
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,24,24,128,1,float16,float16,63,0.39761919975280763
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,24,24,128,1,float16,fp8,63,0.4875216007232666
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,24,24,128,1,float16,float16,127,0.40861759185791013
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,24,24,128,1,float16,fp8,127,0.4970096111297607
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,1,0.023824000358581544
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,1,0.025646400451660157
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,24,24,128,1,float16,float16,255,0.5763423919677735
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,3,0.024025599658489227
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,24,24,128,1,float16,fp8,255,0.6442431926727294
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,3,0.025323200225830077
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,7,0.023849600553512575
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,63,0.023785600066185
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,7,0.02535040080547333
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,15,0.02388000041246414
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,15,0.025620800256729127
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,31,0.023668800294399262
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,31,0.025150400400161744
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,63,0.025348800420761108
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,127,0.023868800699710847
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,127,0.025459200143814087
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,255,0.02766079902648926
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,255,0.029523199796676634
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,511,0.037360000610351565
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,511,0.03692159950733185
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,1023,0.054927998781204225
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,1023,0.05750880241394043
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,2047,0.08538240194320679
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,2047,0.08319680094718933
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,4095,0.14332480430603028
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,4095,0.13603999614715576
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,24,24,128,1,float16,float16,1,0.7297696113586426
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,24,24,128,1,float16,float16,3,0.7352287769317627
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,24,24,128,1,float16,fp8,1,0.8696800231933594
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,24,24,128,1,float16,fp8,3,0.8754207611083984
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,24,24,128,1,float16,float16,7,0.7456768035888672
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,24,24,128,1,float16,fp8,7,0.8769824028015136
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,24,24,128,1,float16,float16,15,0.7560031890869141
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,24,24,128,1,float16,fp8,15,0.9140463829040527
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,24,24,128,1,float16,float16,63,0.7620384216308593
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,24,24,128,1,float16,float16,31,0.7589856147766113
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,24,24,128,1,float16,fp8,31,0.9398863792419434
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,24,24,128,1,float16,fp8,63,0.9375103950500489
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,24,24,128,1,float16,float16,127,0.784823989868164
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,24,24,128,1,float16,fp8,127,0.9548751831054687
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,24,24,128,1,float16,float16,1,1.480241584777832
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,24,24,128,1,float16,float16,3,1.467300796508789
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,24,24,128,1,float16,fp8,1,1.7652847290039062
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,24,24,128,1,float16,float16,7,1.4811696052551269
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,24,24,128,1,float16,fp8,3,1.7800783157348632
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,24,24,128,1,float16,fp8,7,1.829889678955078
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,1,0.02826879918575287
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,24,24,128,1,float16,float16,15,1.4716303825378418
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,1,0.03025279939174652
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,24,24,128,1,float16,float16,31,1.4882287979125977
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,3,0.0303631991147995
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,24,24,128,1,float16,fp8,15,1.8494623184204102
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,3,0.02810240089893341
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,24,24,128,1,float16,float16,63,1.502030372619629
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,24,24,128,1,float16,fp8,31,1.811724853515625
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,7,0.027827200293540955
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,7,0.030532801151275636
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,15,0.02810719907283783
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,15,0.030211201310157774
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,24,24,128,1,float16,fp8,63,1.833580780029297
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,31,0.028044798970222475
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,31,0.03047040104866028
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,63,0.02808319926261902
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,63,0.030423998832702637
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,127,0.028644800186157227
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,127,0.030486398935317995
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,255,0.03803679943084717
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,255,0.036776000261306764
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,511,0.049588799476623535
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,511,0.0534991979598999
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,1023,0.07596480250358581
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,1023,0.07486879825592041
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,2047,0.12195680141448975
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,2047,0.1114351987838745
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,4095,0.21365280151367189
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,4095,0.18936959505081177
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,1,0.04803040027618408
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,1,0.05425919890403748
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,3,0.047654399275779726
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,3,0.05432159900665283
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,7,0.04796159863471985
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,7,0.05432159900665283
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,15,0.04768959879875183
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,15,0.05462080240249634
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,31,0.04806720018386841
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,31,0.05459520220756531
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,63,0.04791359901428223
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,63,0.05429919958114624
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,511,0.10651359558105469
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,127,0.05537279844284058
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,127,0.055201601982116696
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,255,0.07136800289154052
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,255,0.07697119712829589
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,511,0.10053759813308716
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,1023,0.1642400026321411
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,1023,0.15892319679260253
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,2047,0.28659520149230955
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,2047,0.2513488054275513
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,4095,0.5425424098968505
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,1,0.019172799587249757
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,4095,0.4526048183441162
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,1,0.01990559995174408
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,3,0.018779200315475465
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,8191,1.0409487724304198
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,3,0.02017440050840378
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,7,0.019012799859046935
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,31,0.02125120013952255
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,8191,0.8542880058288574
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,7,0.019944000244140624
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,15,0.019016000628471374
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,127,0.019350400567054747
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,15,0.019592000544071196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,31,0.01980479955673218
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,63,0.01903360038995743
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,63,0.021745599806308746
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,127,0.02001439929008484
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,255,0.020868800580501556
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,255,0.022121599316596983
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,511,0.023897600173950196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,511,0.025809600949287415
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,1023,0.023830400407314302
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,1023,0.024833600223064422
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,2047,0.025212800502777098
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,2047,0.025603199005126955
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,4095,0.02933279871940613
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,4095,0.029292801022529603
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,8191,0.04005120098590851
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,8191,0.03518239855766296
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,1,0.01955839991569519
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,1,0.020187200605869295
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,3,0.019201600551605226
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,3,0.02048960030078888
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,7,0.019412800669670105
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,15,0.01955839991569519
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,7,0.020287999510765077
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,15,0.02017119973897934
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,31,0.019415999948978423
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,31,0.020369599759578704
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,63,0.019256000220775605
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,63,0.02045920044183731
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,127,0.019417600333690645
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,127,0.020267200469970704
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,255,0.02104160040616989
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,255,0.022364799678325654
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,511,0.024352000653743745
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,511,0.02574560046195984
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,1023,0.02520799934864044
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,1023,0.025652799010276794
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,8191,0.053420799970626834
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,1,0.032576000690460204
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,2047,0.027513599395751952
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,2047,0.029662400484085083
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,4095,0.040375998616218566
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,4095,0.035390400886535646
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,8191,0.05583360195159912
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,1,0.035524800419807434
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,3,0.03235679864883423
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,3,0.0353632003068924
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,7,0.032235199213027955
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,7,0.035339200496673585
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,15,0.03222239911556244
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,15,0.035262399911880495
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,31,0.03286879956722259
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,31,0.03549599945545197
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,63,0.032235199213027955
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,511,0.06037120223045349
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,511,0.06421759724617004
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,63,0.03564479947090149
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,127,0.03289600014686585
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,127,0.03564479947090149
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,255,0.04631839990615845
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,255,0.04345119893550873
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,1023,0.09486240148544312
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,4095,0.2418816089630127
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,1023,0.09223999977111816
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,2047,0.1558608055114746
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,2047,0.13998719453811645
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,4095,0.27834880352020264
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,1,0.01976799964904785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,1,0.02040800005197525
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,8191,0.5215104103088379
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,3,0.019827200472354888
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,8191,0.4399104118347168
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,3,0.02072319984436035
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,7,0.019633600115776063
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,7,0.020449599623680113
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,15,0.019655999541282655
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,15,0.020528000593185425
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,31,0.019623999297618867
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,31,0.02030719965696335
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,63,0.019756799936294554
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,63,0.020476800203323365
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,127,0.019806399941444397
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,127,0.020664000511169435
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,255,0.021462400257587434
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,255,0.022784000635147093
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,511,0.024607999622821806
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,511,0.026225599646568298
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,1023,0.0272816002368927
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,1023,0.02919520139694214
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,2047,0.04010399878025055
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,2047,0.03584479987621307
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,4095,0.05575360059738159
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,4095,0.0548192024230957
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,8191,0.08851839900016785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,8191,0.07861599922180176
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,1,0.07870240211486816
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,1,0.09167199730873107
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,3,0.07864480018615723
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,3,0.09190239906311035
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,7,0.07823200225830078
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,31,0.0925487995147705
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,7,0.09246559739112854
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,15,0.07830560207366943
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,15,0.09208319783210754
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,31,0.07860959768295288
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,63,0.0850928008556366
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,63,0.09263520240783692
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,127,0.08769919872283935
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,127,0.10271199941635131
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,255,0.1166640043258667
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,255,0.130729603767395
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,511,0.17492480278015138
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,511,0.19093120098114014
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,1023,0.3044303894042969
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,1023,0.29058079719543456
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,2047,0.5468944072723388
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,16,128,1,float16,float16,1,0.13765280246734618
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,2047,0.4703519821166992
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,16,128,1,float16,fp8,1,0.1656432032585144
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,16,128,1,float16,float16,3,0.13689600229263305
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,16,128,1,float16,float16,7,0.1377392053604126
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,16,128,1,float16,fp8,3,0.1662384033203125
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,16,128,1,float16,fp8,7,0.16558719873428346
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,4095,1.063759994506836
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,16,128,1,float16,float16,15,0.13623039722442626
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,4095,0.8744416236877441
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,16,128,1,float16,fp8,15,0.16543680429458618
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,16,128,1,float16,float16,31,0.14160159826278687
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,16,128,1,float16,fp8,31,0.16540000438690186
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,16,128,1,float16,float16,63,0.147705602645874
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,16,128,1,float16,fp8,63,0.17683520317077636
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,16,128,1,float16,float16,127,0.16899360418319703
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,16,128,1,float16,fp8,127,0.17941440343856813
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,16,128,1,float16,float16,255,0.2153343915939331
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,16,128,1,float16,fp8,255,0.2340912103652954
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,16,128,1,float16,float16,511,0.3233839988708496
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,16,128,1,float16,fp8,511,0.35593600273132325
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,16,128,1,float16,float16,1,0.2643615961074829
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,16,128,1,float16,float16,1023,0.5765999794006348
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,16,128,1,float16,fp8,1,0.30770881175994874
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,16,128,1,float16,fp8,1023,0.5479055881500244
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,16,128,1,float16,float16,3,0.2569648027420044
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,16,128,1,float16,fp8,3,0.3092207908630371
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,16,128,1,float16,float16,7,0.2576272010803223
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,16,128,1,float16,fp8,7,0.310915207862854
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,16,128,1,float16,float16,15,0.2626847982406616
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,16,128,1,float16,fp8,15,0.3125312089920044
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,16,128,1,float16,float16,31,0.2707743883132935
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,16,128,1,float16,fp8,31,0.32440800666809083
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,16,128,1,float16,float16,63,0.2761759996414185
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,16,128,1,float16,fp8,63,0.3344880104064941
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,16,128,1,float16,float16,127,0.28540959358215334
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,16,128,1,float16,fp8,127,0.34352641105651854
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,16,128,1,float16,float16,255,0.39647839069366453
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,16,128,1,float16,fp8,255,0.44823517799377444
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,1,0.020340800285339355
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,1,0.02107519954442978
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,16,128,1,float16,float16,511,0.6192368030548095
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,3,0.020364800095558168
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,16,128,1,float16,fp8,511,0.6809311866760254
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,3,0.021352000534534454
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,7,0.020403200387954713
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,7,0.020948800444602966
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,63,0.021143999695777894
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,127,0.02045599967241287
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,15,0.02021760046482086
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,15,0.021184000372886657
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,31,0.0200655996799469
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,31,0.020975999534130096
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,63,0.020216000080108643
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,127,0.021089600026607515
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,255,0.022096000611782074
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,255,0.0234607994556427
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,511,0.02523840069770813
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,511,0.0270224004983902
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,1023,0.0399183988571167
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,1023,0.034929600358009336
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,2047,0.054833602905273435
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,2047,0.05425440073013306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,4095,0.08796479701995849
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,4095,0.07987520098686218
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,8191,0.14803680181503295
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,8191,0.13232159614562988
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,16,128,1,float16,float16,1,0.4870319843292236
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,16,128,1,float16,fp8,1,0.5906144142150879
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,16,128,1,float16,float16,3,0.49169120788574217
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,16,128,1,float16,fp8,3,0.5899648189544677
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,16,128,1,float16,float16,7,0.4965839862823486
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,16,128,1,float16,fp8,7,0.5910943984985352
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,16,128,1,float16,float16,15,0.5126880168914795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,16,128,1,float16,fp8,15,0.5971744060516357
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,16,128,1,float16,float16,31,0.5130127906799317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,16,128,1,float16,fp8,31,0.6380256175994873
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,16,128,1,float16,float16,63,0.5211760044097901
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,16,128,1,float16,fp8,63,0.6391839981079102
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,16,128,1,float16,float16,127,0.5378399848937988
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,16,128,1,float16,fp8,127,0.6436831951141357
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,16,128,1,float16,float16,255,0.7533215999603271
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,16,128,1,float16,fp8,255,0.8502415657043457
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,16,128,1,float16,float16,1,0.988219165802002
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,16,128,1,float16,fp8,1,1.1552639961242677
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,16,128,1,float16,float16,3,0.9910927772521972
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,16,128,1,float16,fp8,3,1.1589072227478028
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,16,128,1,float16,float16,7,0.9922752380371094
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,16,128,1,float16,fp8,7,1.1649727821350098
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,16,128,1,float16,float16,15,0.9898688316345214
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,16,128,1,float16,fp8,15,1.2436976432800293
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,16,128,1,float16,float16,31,1.0036815643310546
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,1,0.024083200097084045
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,16,128,1,float16,float16,63,1.0147456169128417
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,16,128,1,float16,fp8,31,1.2393983840942382
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,3,0.02407519966363907
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,1,0.025526401400566102
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,16,128,1,float16,fp8,63,1.2348912239074707
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,3,0.025811201333999632
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,16,128,1,float16,float16,127,1.034768009185791
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,7,0.02584159970283508
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,7,0.0239424005150795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,15,0.024352000653743745
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,15,0.0254831999540329
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,31,0.024033600091934205
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,16,128,1,float16,fp8,127,1.2429167747497558
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,31,0.025788798928260803
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,63,0.024193599820137024
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,63,0.02590720057487488
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,127,0.024401600658893585
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,127,0.02587839961051941
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,255,0.028148800134658813
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,255,0.029691201448440552
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,511,0.039985600113868716
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,511,0.037478399276733396
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,1023,0.05810400247573853
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,1023,0.05834239721298218
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,2047,0.08936960101127625
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,2047,0.08272960186004638
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,4095,0.15127840042114257
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,4095,0.13373759984970093
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,8191,0.2731712102890015
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,1,0.040227198600769044
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,1,0.045633599162101746
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,8191,0.2350208044052124
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,3,0.04021599888801575
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,3,0.04527519941329956
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,7,0.0404448002576828
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,7,0.04523679912090302
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,63,0.04508480131626129
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,15,0.040428799390792844
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,15,0.044947201013565065
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,31,0.04057919979095459
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,31,0.045238399505615236
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,63,0.04045119881629944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,127,0.046408000588417056
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,127,0.04538399875164032
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,1023,0.12566399574279785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,255,0.06033759713172913
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,255,0.062428802251815796
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,511,0.08125439882278443
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,511,0.08615999817848205
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,1023,0.13073920011520385
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,4095,0.34963040351867675
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,2047,0.2210927963256836
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,2047,0.19627840518951417
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,4095,0.4082223892211914
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,1,0.019092799723148347
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,1,0.019944000244140624
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,3,0.019116799533367156
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,8191,0.7891712188720703
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,3,0.019939200580120088
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,8191,0.6492144107818604
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,7,0.018598400056362152
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,7,0.01972000002861023
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,15,0.018966400623321535
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,15,0.019996799528598785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,31,0.01897120028734207
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,63,0.019176000356674196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,31,0.019996799528598785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,63,0.0198512002825737
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,127,0.01910240054130554
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,511,0.025443199276924133
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,1023,0.0242576003074646
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,127,0.019888000190258028
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,255,0.02065120041370392
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,255,0.021587200462818146
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,511,0.023824000358581544
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,1023,0.02558560073375702
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,2047,0.024879999458789825
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,2047,0.025787198543548585
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,4095,0.028071999549865723
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,4095,0.02831520140171051
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,8191,0.03680639863014221
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,8191,0.03399200141429901
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,1,0.01916159987449646
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,1,0.02022400051355362
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,3,0.019203199446201323
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,3,0.020190399885177613
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,7,0.019211199879646302
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,7,0.020473599433898926
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,15,0.01937119960784912
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,15,0.020001600682735442
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,31,0.01926400065422058
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,31,0.0203792005777359
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,63,0.019443200528621675
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,63,0.02009119987487793
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,127,0.01950400024652481
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,127,0.020080000162124634
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,255,0.021227200329303742
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,255,0.022036799788475038
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,511,0.024270400404930115
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,511,0.02585119903087616
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,1023,0.02475679963827133
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,1023,0.026604801416397095
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,2047,0.027820798754692077
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,2047,0.028486400842666626
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,4095,0.03607200086116791
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,4095,0.03426559865474701
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,8191,0.04941760003566742
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,8191,0.050203198194503786
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,1,0.028734400868415833
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,1,0.030593600869178773
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,3,0.0283376008272171
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,3,0.030612799525260925
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,7,0.028579199314117433
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,7,0.030718401074409485
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,15,0.028331199288368226
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,15,0.030623999238014222
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,31,0.028808000683784484
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,31,0.030726400017738343
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,63,0.028519999980926514
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,63,0.030752000212669373
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,127,0.028939199447631837
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,127,0.03060320019721985
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,255,0.03813279867172241
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,255,0.03699199855327606
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,511,0.05007839798927307
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,511,0.05322239995002746
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,1023,0.07645440101623535
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,1023,0.0753167986869812
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,2047,0.12240959405899048
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,2047,0.11161600351333618
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,4095,0.21525440216064454
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,4095,0.18961119651794434
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,1,0.019753600656986236
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,8191,0.39548161029815676
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,1,0.020505599677562714
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,8191,0.3397631883621216
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,15,0.020707200467586517
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,3,0.019662399590015412
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,3,0.020664000511169435
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,7,0.01935359984636307
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,7,0.02020000070333481
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,15,0.01987680047750473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,31,0.019683200120925903
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,31,0.0203792005777359
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,63,0.01974080055952072
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,63,0.020443199574947356
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,127,0.01982080042362213
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,127,0.020483200252056123
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,255,0.02136480063199997
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,255,0.02276960015296936
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,511,0.024639999866485594
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,511,0.02624480128288269
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,1023,0.02812960147857666
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,1023,0.03298400044441223
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,2047,0.043680000305175784
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,2047,0.040659201145172116
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,4095,0.06473119854927063
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,4095,0.06507520079612732
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,8191,0.10505919456481934
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,8191,0.0999455988407135
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,1,0.06384000182151794
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,1,0.07324320077896118
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,3,0.06369119882583618
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,3,0.07350239753723145
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,7,0.0639631986618042
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,7,0.07418879866600037
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,15,0.06388000249862671
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,15,0.07370560169219971
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,31,0.06378880143165588
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,255,0.09644799828529357
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,31,0.07379040122032166
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,63,0.06880000233650208
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,63,0.07377279996871948
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,127,0.07290080189704895
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,127,0.08134080171585083
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,255,0.10454879999160767
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,511,0.13836159706115722
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,511,0.14860160350799562
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,1023,0.23370559215545655
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,1023,0.22388958930969238
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,2047,0.4199215888977051
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,2047,0.3659615993499756
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,12,12,128,1,float16,float16,1,0.10895359516143799
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,12,12,128,1,float16,fp8,1,0.13008960485458373
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,4095,0.7984943866729737
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,12,12,128,1,float16,float16,3,0.10852479934692383
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,4095,0.6650928020477295
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,12,12,128,1,float16,fp8,3,0.12991520166397094
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,12,12,128,1,float16,float16,7,0.10919680595397949
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,12,12,128,1,float16,fp8,7,0.13089439868927003
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,12,12,128,1,float16,float16,15,0.10882240533828735
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,12,12,128,1,float16,fp8,15,0.13603039979934692
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,12,12,128,1,float16,float16,31,0.1106592059135437
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,12,12,128,1,float16,float16,63,0.11845279932022094
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,12,12,128,1,float16,fp8,31,0.13039040565490723
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,12,12,128,1,float16,fp8,63,0.14044320583343506
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,12,12,128,1,float16,float16,127,0.12051520347595215
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,12,12,128,1,float16,fp8,127,0.14353439807891846
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,12,12,128,1,float16,float16,255,0.16936320066452026
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,12,12,128,1,float16,fp8,255,0.18324480056762696
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,12,12,128,1,float16,float16,511,0.25168800354003906
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,12,12,128,1,float16,fp8,511,0.2755311965942383
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,12,12,128,1,float16,float16,1023,0.4438528060913086
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,12,12,128,1,float16,float16,1,0.19928159713745117
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,12,12,128,1,float16,fp8,1023,0.4227168083190918
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,12,12,128,1,float16,fp8,1,0.24300000667572022
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,12,12,128,1,float16,float16,3,0.1981376051902771
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,12,12,128,1,float16,fp8,3,0.23949599266052246
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,12,12,128,1,float16,float16,7,0.1999295949935913
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,12,12,128,1,float16,fp8,7,0.23997440338134765
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,12,12,128,1,float16,float16,15,0.20091040134429933
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,12,12,128,1,float16,fp8,63,0.2558784008026123
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,12,12,128,1,float16,fp8,15,0.2400752067565918
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,12,12,128,1,float16,float16,31,0.21261920928955078
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,12,12,128,1,float16,fp8,31,0.2423151969909668
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,12,12,128,1,float16,float16,63,0.21357920169830322
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,12,12,128,1,float16,float16,127,0.22009758949279784
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,12,12,128,1,float16,fp8,127,0.26333279609680177
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,12,12,128,1,float16,float16,255,0.3050368070602417
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,12,12,128,1,float16,fp8,255,0.3396064043045044
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,1,0.019713599979877473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,1,0.020729599893093108
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,12,12,128,1,float16,float16,511,0.4683407783508301
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,3,0.019963200390338897
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,12,12,128,1,float16,fp8,511,0.5264800071716309
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,31,0.020022399723529816
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,3,0.020934399962425233
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,7,0.01993120014667511
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,7,0.020707200467586517
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,15,0.019817599654197694
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,127,0.020937600731849672
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,15,0.02062560021877289
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,31,0.020926399528980254
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,63,0.02024639993906021
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,63,0.020900799334049223
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,127,0.02014079988002777
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,255,0.021961599588394165
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,255,0.023024000227451324
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,511,0.024990400671958922
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,511,0.026822400093078614
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,1023,0.04125120043754578
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,1023,0.03956480026245117
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,2047,0.05594720244407654
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,2047,0.05772320032119751
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,4095,0.08697280287742615
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,4095,0.08340799808502197
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,8191,0.1455888032913208
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,8191,0.13654719591140746
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,12,12,128,1,float16,float16,1,0.3756655931472778
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,12,12,128,1,float16,fp8,1,0.45202078819274905
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,12,12,128,1,float16,float16,3,0.3754447937011719
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,12,12,128,1,float16,fp8,3,0.4520671844482422
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,12,12,128,1,float16,float16,7,0.373089599609375
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,12,12,128,1,float16,fp8,7,0.4488815784454346
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,12,12,128,1,float16,float16,15,0.38796000480651854
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,12,12,128,1,float16,fp8,15,0.4528031826019287
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,12,12,128,1,float16,float16,127,0.41213598251342776
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,12,12,128,1,float16,float16,31,0.396777606010437
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,12,12,128,1,float16,fp8,31,0.48663840293884275
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,12,12,128,1,float16,float16,63,0.4014671802520752
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,12,12,128,1,float16,fp8,63,0.4867487907409668
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,12,12,128,1,float16,fp8,127,0.49108161926269533
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,12,12,128,1,float16,float16,255,0.5771920204162597
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,12,12,128,1,float16,fp8,255,0.6536431789398194
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,12,12,128,1,float16,float16,1,0.7259696006774903
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,12,12,128,1,float16,fp8,1,0.8715951919555665
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,12,12,128,1,float16,float16,3,0.7355311870574951
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,12,12,128,1,float16,fp8,3,0.8725168228149414
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,12,12,128,1,float16,float16,7,0.7513631820678711
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,12,12,128,1,float16,fp8,7,0.8677599906921387
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,12,12,128,1,float16,float16,15,0.7548431873321533
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,12,12,128,1,float16,float16,31,0.7573647975921631
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,12,12,128,1,float16,fp8,15,0.9145039558410645
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,12,12,128,1,float16,fp8,31,0.9329855918884278
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,12,12,128,1,float16,float16,63,0.7718239784240722
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,1,0.023974399268627166
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,1,0.025331199169158936
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,12,12,128,1,float16,fp8,63,0.9396639823913574
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,3,0.023902399837970732
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,12,12,128,1,float16,float16,127,0.7960927963256836
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,3,0.025355198979377748
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,12,12,128,1,float16,fp8,127,0.9438112258911133
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,7,0.023963199555873872
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,7,0.0253711998462677
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,15,0.023710399866104126
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,15,0.025307199358940123
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,31,0.023977600038051605
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,31,0.025385600328445435
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,63,0.024001599848270418
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,63,0.02550239861011505
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,127,0.023921599984169005
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,127,0.025510400533676147
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,255,0.027588799595832825
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,255,0.029310399293899538
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,2047,0.08511040210723878
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,511,0.03714239895343781
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,511,0.036955198645591734
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,1023,0.05485919713973999
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,1023,0.057467198371887206
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,2047,0.08185120224952698
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,4095,0.14509279727935792
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,4095,0.13532160520553588
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,8191,0.257096004486084
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,1,0.03265919983386993
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,8191,0.23431839942932128
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,1,0.03575200140476227
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,3,0.03264159858226776
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,3,0.03617120087146759
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,7,0.03282879889011383
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,7,0.03597759902477264
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,15,0.03289600014686585
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,15,0.035838401317596434
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,31,0.032393598556518556
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,31,0.03609279990196228
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,63,0.03300159871578216
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,63,0.03583039939403534
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,127,0.03334720134735107
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,127,0.03594239950180054
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,255,0.04752640128135681
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,255,0.04405120015144348
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,511,0.06053280234336853
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,511,0.0652671992778778
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,1023,0.09540320038795472
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,4095,0.2420799970626831
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,1023,0.09272000193595886
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,2047,0.157478404045105
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,2047,0.14132800102233886
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,4095,0.27991039752960206
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,8191,0.5217296123504639
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,1,0.01892800033092499
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,8191,0.43970561027526855
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,1,0.019607999920845033
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,3,0.01921759992837906
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,3,0.019392000138759614
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,7,0.018987199664115904
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,7,0.01945119947195053
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,16383,1.0157695770263673
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,16383,0.8306112289428711
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,15,0.01953279972076416
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,15,0.018459199368953703
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,31,0.01918880045413971
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,31,0.01945919990539551
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,63,0.019009600579738616
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,511,0.02343360036611557
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,63,0.01945919990539551
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,127,0.020732800662517547
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,255,0.020520000159740447
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,127,0.02000479996204376
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,255,0.02181279957294464
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,511,0.025348800420761108
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,1023,0.023020799458026885
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,1023,0.024424000084400176
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,2047,0.023856000602245332
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,2047,0.02544800043106079
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,4095,0.0255295991897583
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,4095,0.02606239914894104
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,8191,0.029609599709510805
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,8191,0.030108800530433653
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,16383,0.040724799036979675
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,16383,0.03621760010719299
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,1,0.01923840045928955
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,1,0.020275199413299562
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,3,0.019179199635982514
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,3,0.020046399533748628
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,7,0.01905920058488846
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,7,0.019860799610614776
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,15,0.019219200313091277
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,15,0.019968000054359437
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,31,0.019227199256420135
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,31,0.02022880017757416
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,63,0.018913599848747253
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,63,0.01995680034160614
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,127,0.01930239945650101
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,127,0.020025600492954255
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,255,0.02096800059080124
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,255,0.02182080000638962
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,511,0.023982399702072145
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,511,0.026313599944114686
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,1023,0.02361920028924942
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,1023,0.02491839975118637
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,2047,0.0254831999540329
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,2047,0.025649601221084596
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,4095,0.02900640070438385
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,4095,0.029449599981307983
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,8191,0.039854401350021364
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,8191,0.03559519946575165
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,16383,0.055980801582336426
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,16383,0.05335360169410706
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,1,0.02470560073852539
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,1,0.026206400990486146
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,3,0.024748800694942473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,3,0.026235198974609374
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,7,0.024571199715137482
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,7,0.026012799143791197
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,63,0.024902400374412537
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,15,0.02476480007171631
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,15,0.02619360089302063
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,31,0.024619199335575104
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,31,0.026163199543952943
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,63,0.02609440088272095
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,127,0.024676799774169922
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,127,0.026339200139045716
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,255,0.028518399596214293
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,255,0.030640000104904176
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,511,0.04009599983692169
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,511,0.03850080072879791
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,1023,0.05872160196304321
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,1023,0.05848960280418396
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,2047,0.09072319865226745
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,2047,0.08272799849510193
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,4095,0.152292799949646
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,4095,0.13481600284576417
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,8191,0.2702064037322998
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,3,0.019593599438667297
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,8191,0.23616480827331543
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,1,0.019407999515533448
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,1,0.020329600572586058
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,16383,0.5314095973968506
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,16383,0.42968320846557617
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,3,0.02022880017757416
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,7,0.01934719979763031
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,7,0.020129600167274476
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,15,0.019443200528621675
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,15,0.02030559927225113
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,31,0.0194815993309021
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,31,0.02011999934911728
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,63,0.019662399590015412
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,63,0.02019519954919815
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,127,0.019512000679969787
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,127,0.02045920044183731
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,255,0.02128320038318634
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,255,0.022300800681114195
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,511,0.02404160052537918
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,511,0.025676798820495606
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,1023,0.025537601113319396
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,1023,0.025459200143814087
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,2047,0.02739199995994568
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,2047,0.02932159900665283
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,1,0.04848639965057373
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,4095,0.03980480134487152
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,4095,0.035540801286697385
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,8191,0.05551360249519348
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,8191,0.05367839932441711
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,16383,0.08834400177001953
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,16383,0.07962080240249633
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,1,0.055118399858474734
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,15,0.05561439990997315
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,3,0.04856480062007904
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,3,0.05517280101776123
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,7,0.04845919907093048
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,7,0.05535680055618286
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,15,0.04870400130748749
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,31,0.04918240010738373
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,31,0.055272001028060916
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,63,0.049137601256370546
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,63,0.055129599571228025
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,127,0.0565887987613678
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,127,0.05552800297737122
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,255,0.07286720275878907
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,255,0.07760319709777833
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,511,0.10105600357055664
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,511,0.10759199857711792
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,1023,0.1664191961288452
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,1023,0.15886080265045166
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,2047,0.289521598815918
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,2047,0.2506191968917847
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,4095,0.5382143974304199
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,4095,0.4516751766204834
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,8,128,1,float16,float16,1,0.07997440099716187
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,8,128,1,float16,fp8,1,0.09357439875602722
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,8,128,1,float16,float16,3,0.08110880255699157
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,8,128,1,float16,fp8,3,0.09375039935111999
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,8,128,1,float16,float16,7,0.0826095998287201
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,8191,1.0440352439880372
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,8191,0.8508128166198731
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,8,128,1,float16,fp8,7,0.09395840167999267
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,8,128,1,float16,float16,15,0.08036959767341614
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,8,128,1,float16,fp8,15,0.09409120082855224
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,8,128,1,float16,float16,31,0.08062080144882203
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,8,128,1,float16,fp8,31,0.09368159770965576
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,8,128,1,float16,float16,63,0.08865919709205627
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,8,128,1,float16,fp8,63,0.10787999629974365
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,8,128,1,float16,float16,127,0.09177759885787964
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,8,128,1,float16,fp8,127,0.10443040132522582
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,8,128,1,float16,float16,255,0.11998080015182495
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,8,128,1,float16,fp8,255,0.13244320154190065
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,8,128,1,float16,float16,511,0.17630239725112914
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,8,128,1,float16,fp8,511,0.19414399862289428
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,8,128,1,float16,float16,1023,0.30645759105682374
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,8,128,1,float16,fp8,1023,0.29072160720825196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,8,128,1,float16,float16,1,0.1417647957801819
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,8,128,1,float16,float16,2047,0.5602015972137451
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,8,128,1,float16,fp8,1,0.16870239973068238
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,8,128,1,float16,float16,3,0.1422287940979004
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,8,128,1,float16,fp8,2047,0.48108959197998047
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,8,128,1,float16,fp8,3,0.17106560468673707
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,8,128,1,float16,float16,7,0.14124959707260132
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,8,128,1,float16,fp8,7,0.17015839815139772
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,8,128,1,float16,float16,15,0.14078400135040284
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,8,128,1,float16,fp8,15,0.16906720399856567
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,8,128,1,float16,float16,31,0.14933760166168214
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,8,128,1,float16,fp8,31,0.16974079608917236
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,8,128,1,float16,float16,63,0.1530511975288391
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,8,128,1,float16,fp8,63,0.1816431999206543
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,8,128,1,float16,float16,127,0.15708639621734619
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,8,128,1,float16,fp8,127,0.18474080562591552
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,8,128,1,float16,float16,255,0.2123136043548584
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,8,128,1,float16,fp8,255,0.2419600009918213
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,8,128,1,float16,float16,511,0.3232367992401123
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,8,128,1,float16,fp8,511,0.3639136075973511
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,1,0.019687999784946442
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,1,0.020572799444198608
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,8,128,1,float16,float16,1023,0.5806416034698486
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,3,0.019918400049209594
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,8,128,1,float16,fp8,1023,0.5568336009979248
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,3,0.020710399746894835
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,7,0.019832000136375427
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,7,0.020817600190639496
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,15,0.01998240053653717
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,15,0.020499199628829956
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,31,0.02001280039548874
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,31,0.020791999995708466
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,63,0.019721600413322448
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,63,0.02069759964942932
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,127,0.019952000677585603
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,127,0.02070080041885376
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,255,0.02176000028848648
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,255,0.022886399924755097
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,511,0.025116801261901855
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,511,0.02643040120601654
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,1023,0.027423998713493346
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,1023,0.029449599981307983
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,2047,0.04094879925251007
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,2047,0.03603839874267578
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,4095,0.056655997037887575
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,4095,0.05434240102767944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,8191,0.08942559957504273
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,8191,0.08000320196151733
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,16383,0.1487712025642395
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,16383,0.13125439882278442
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,8,128,1,float16,float16,1,0.25946879386901855
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,8,128,1,float16,fp8,1,0.3102911949157715
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,8,128,1,float16,float16,3,0.2569535970687866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,8,128,1,float16,fp8,3,0.31186399459838865
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,8,128,1,float16,float16,7,0.2592400074005127
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,8,128,1,float16,fp8,7,0.31215999126434324
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,8,128,1,float16,float16,15,0.2626255989074707
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,8,128,1,float16,fp8,15,0.3121056079864502
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,8,128,1,float16,float16,31,0.2759968042373657
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,8,128,1,float16,fp8,31,0.3251231908798218
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,8,128,1,float16,float16,63,0.2770927906036377
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,8,128,1,float16,fp8,63,0.3359503984451294
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,8,128,1,float16,float16,127,0.2885296106338501
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,8,128,1,float16,fp8,127,0.3413072109222412
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,8,128,1,float16,float16,255,0.39572958946228026
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,8,128,1,float16,fp8,255,0.44766721725463865
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,8,128,1,float16,float16,511,0.6260240077972412
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,8,128,1,float16,float16,1,0.4884175777435303
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,8,128,1,float16,fp8,1,0.5914415836334228
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,8,128,1,float16,fp8,511,0.6823728084564209
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,8,128,1,float16,float16,3,0.49224319458007815
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,8,128,1,float16,fp8,3,0.5913407802581787
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,8,128,1,float16,float16,7,0.495195198059082
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,8,128,1,float16,fp8,7,0.5921807765960694
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,8,128,1,float16,float16,15,0.5195615768432618
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,8,128,1,float16,fp8,15,0.6003136157989502
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,8,128,1,float16,float16,31,0.5189487934112549
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,8,128,1,float16,fp8,31,0.6335968017578125
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,8,128,1,float16,float16,63,0.5245007991790771
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,8,128,1,float16,fp8,63,0.6373551845550537
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,8,128,1,float16,float16,127,0.5379024028778077
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,8,128,1,float16,fp8,127,0.644385576248169
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,1,0.02034880071878433
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,1,0.020839999616146087
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,8,128,1,float16,float16,255,0.7630127906799317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,3,0.02034880071878433
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,3,0.021096000075340272
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,8,128,1,float16,fp8,255,0.8511584281921387
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,7,0.020164799690246583
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,7,0.021132799983024596
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,15,0.02029920071363449
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,15,0.021134400367736818
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,31,0.020403200387954713
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,31,0.02133920043706894
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,63,0.020449599623680113
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,63,0.021115200221538545
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,127,0.02051520049571991
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,511,0.02706719934940338
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,255,0.023537600040435792
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,255,0.022191999852657317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,127,0.02110559940338135
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,511,0.025486400723457335
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,1023,0.03972159922122955
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,1023,0.03468480110168457
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,2047,0.055276799201965335
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,2047,0.05429919958114624
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,4095,0.08835840225219727
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,4095,0.07902079820632935
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,8191,0.15053600072860718
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,8191,0.1309712052345276
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,1,0.02550880014896393
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,16383,0.23044159412384033
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,16383,0.2693455934524536
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,1,0.026825600862503053
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,3,0.024963200092315674
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,3,0.026715201139450074
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,7,0.02499839961528778
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,7,0.026913601160049438
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,15,0.025467199087142945
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,15,0.026748800277709962
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,31,0.02510400116443634
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,31,0.02669920027256012
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,63,0.025190401077270507
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,63,0.026940798759460448
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,511,0.04140959978103638
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,127,0.025089600682258607
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,127,0.02715199887752533
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,255,0.02853280007839203
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,255,0.030771198868751525
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,2047,0.08330240249633789
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,511,0.038689601421356204
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,1023,0.05925920009613037
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,4095,0.1352944016456604
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,1023,0.05948479771614075
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,2047,0.09257919788360595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,4095,0.15312960147857665
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,8191,0.2350208044052124
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,8191,0.274835205078125
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,16383,0.5239744186401367
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,1,0.018795199692249298
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,16383,0.429366397857666
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,1,0.019623999297618867
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,3,0.018796800076961516
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,7,0.01889439970254898
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,7,0.019657599925994872
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,3,0.019729599356651306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,32767,1.043723201751709
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,32767,0.8200240135192871
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,15,0.019657599925994872
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,127,0.020937600731849672
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,15,0.018644799292087556
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,31,0.018884800374507904
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,31,0.019435200095176696
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,63,0.018726399540901183
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,63,0.019593599438667297
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,127,0.019726400077342988
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,255,0.0201664000749588
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,255,0.022495999932289124
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,511,0.0233024001121521
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,511,0.025521600246429445
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,1023,0.024135999381542206
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,1023,0.02531520128250122
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,2047,0.02428800016641617
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,2047,0.02550719976425171
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,4095,0.02436159998178482
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,4095,0.026791998744010927
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,8191,0.026318401098251343
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,8191,0.027188798785209654
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,16383,0.029977598786354066
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,16383,0.03062080144882202
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,32767,0.041607999801635744
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,32767,0.03825120031833649
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,1,0.01886080056428909
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,1,0.01998240053653717
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,3,0.01902720034122467
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,3,0.01985599994659424
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,7,0.018862399458885192
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,7,0.019808000326156615
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,15,0.018785600364208222
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,15,0.019974400103092194
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,31,0.018830400705337525
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,31,0.019980800151824952
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,63,0.019126400351524353
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,63,0.019844800233840942
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,127,0.01926079988479614
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,127,0.019675199687480927
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,255,0.020715199410915375
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,255,0.021910400688648225
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,511,0.023393599689006804
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,511,0.025681599974632263
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,1023,0.02465440034866333
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,1023,0.025947201251983642
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,2047,0.024817599356174468
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,2047,0.025900799036026
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,4095,0.026395198702812196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,4095,0.02659519910812378
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,8191,0.02998720109462738
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,8191,0.03083199858665466
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,16383,0.04169760048389435
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,16383,0.03635680079460144
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,32767,0.057918399572372437
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,32767,0.05629119873046875
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,1,0.020491200685501098
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,1,0.02168159931898117
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,3,0.020623999834060668
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,3,0.021316799521446227
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,7,0.020553599298000335
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,7,0.021768000721931458
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,15,0.020535999536514284
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,15,0.02146719992160797
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,31,0.020500800013542174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,31,0.021694399416446686
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,63,0.02080159932374954
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,63,0.021615999937057494
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,127,0.020771199464797975
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,127,0.021777600049972534
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,255,0.022404800355434417
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,255,0.02356960028409958
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,2047,0.05583680272102356
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,511,0.025836798548698425
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,511,0.027294400334358215
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,1023,0.03967840075492859
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,8191,0.1309167981147766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,1023,0.03543039858341217
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,2047,0.05392959713935852
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,4095,0.08888800144195556
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,4095,0.0802079975605011
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,8191,0.149127995967865
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,16383,0.2737679958343506
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,16383,0.2303231954574585
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,1,0.018972800672054292
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,1,0.020207999646663664
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,32767,0.5155360221862793
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,3,0.019124799966812135
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,32767,0.4256127834320068
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,3,0.0203247994184494
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,7,0.019398400187492372
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,7,0.01972319930791855
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,15,0.018934400379657747
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,15,0.020017600059509276
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,31,0.01905920058488846
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,31,0.02014880031347275
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,63,0.019201600551605226
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,63,0.019857600331306458
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,127,0.01945919990539551
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,127,0.020124800503253937
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,255,0.020916800200939178
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,255,0.022121599316596983
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,511,0.023992000520229338
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,511,0.025755199790000915
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,1023,0.0245728000998497
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,1023,0.025655999779701233
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,2047,0.026228800415992737
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,16383,0.05670560002326965
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,2047,0.02635039985179901
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,4095,0.029872000217437744
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,4095,0.030347201228141784
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,8191,0.04088160097599029
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,8191,0.036473599076271054
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,16383,0.05450720191001892
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,32767,0.08990560173988342
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,32767,0.07989919781684876
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,1,0.03415839970111847
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,1,0.037390398979187014
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,3,0.03428960144519806
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,3,0.03760800063610077
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,7,0.034108799695968625
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,7,0.03734399974346161
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,15,0.03463680148124695
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,15,0.037540799379348753
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,31,0.03426559865474701
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,31,0.037436801195144656
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,63,0.03434720039367676
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,63,0.03772799968719483
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,127,0.03556160032749176
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,1023,0.09764800071716309
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,127,0.03758879899978638
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,255,0.049472001194953916
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,255,0.045444801449775696
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,511,0.06354560256004334
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,511,0.06614879965782165
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,1023,0.09413440227508545
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,2047,0.1593999981880188
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,2047,0.14188799858093262
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,4095,0.2800784111022949
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,4095,0.24232800006866456
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,8191,0.5315104007720948
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,8191,0.44244799613952634
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,1,0.051635199785232545
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,1,0.05788159966468811
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,3,0.05161759853363037
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,3,0.05829600095748901
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,7,0.05168160200119019
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,16383,1.0296607971191407
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,7,0.058139199018478395
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,16383,0.8326576232910157
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,15,0.05173119902610779
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,15,0.05774239897727966
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,31,0.05191680192947388
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,31,0.05824959874153137
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,63,0.059747201204299924
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,63,0.0581167995929718
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,127,0.06062560081481934
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,127,0.0604095995426178
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,255,0.0754256010055542
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,255,0.08011680245399475
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,511,0.1057136058807373
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,511,0.11269279718399047
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,1023,0.17106399536132813
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,1023,0.16404639482498168
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,2047,0.2906352043151855
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,4095,0.545521593093872
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,2047,0.25399041175842285
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,4,128,1,float16,float16,1,0.08731359839439393
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,4,128,1,float16,fp8,1,0.09959040284156799
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,4,128,1,float16,float16,3,0.08695200085639954
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,4095,0.45430560111999513
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,4,128,1,float16,fp8,3,0.09972959756851196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,4,128,1,float16,float16,7,0.0870959997177124
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,4,128,1,float16,fp8,7,0.09943839907646179
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,4,128,1,float16,float16,15,0.0875440001487732
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,4,128,1,float16,fp8,15,0.0998207986354828
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,4,128,1,float16,float16,31,0.08686559796333312
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,4,128,1,float16,fp8,31,0.09997119903564453
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,4,128,1,float16,float16,63,0.0944320023059845
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,4,128,1,float16,fp8,63,0.10207840204238891
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,4,128,1,float16,float16,127,0.09500960111618043
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,4,128,1,float16,fp8,127,0.10847519636154175
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,4,128,1,float16,float16,1023,0.3130543947219849
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,4,128,1,float16,float16,255,0.12881920337677003
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,4,128,1,float16,fp8,255,0.13791999816894532
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,4,128,1,float16,float16,511,0.18338719606399537
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,4,128,1,float16,fp8,511,0.2004528045654297
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,4,128,1,float16,fp8,1023,0.29883360862731934
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,1,0.019732800126075745
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,1,0.020388799905776977
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,4,128,1,float16,float16,2047,0.5648159980773926
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,3,0.019852800667285918
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,4,128,1,float16,fp8,2047,0.48038558959960936
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,3,0.02035840004682541
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,7,0.019752000272274018
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,7,0.02054239958524704
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,15,0.01966399997472763
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,15,0.02054080069065094
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,31,0.019596800208091736
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,31,0.020326399803161622
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,63,0.019495999813079833
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,63,0.02041600048542023
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,127,0.019566400349140166
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,127,0.020436799526214598
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,255,0.021166400611400606
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,255,0.02263839989900589
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,511,0.024432000517845155
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,511,0.02592639923095703
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,1023,0.026705598831176756
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,1023,0.026888000965118408
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,2047,0.02847360074520111
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,2047,0.03041439950466156
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,4095,0.04146240055561066
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,4095,0.036371201276779175
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,8191,0.05713919997215271
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,8191,0.05459840297698974
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,16383,0.09037280082702637
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,16383,0.08019199967384338
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,32767,0.15171040296554567
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,32767,0.13173439502716064
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,4,128,1,float16,float16,1,0.15023839473724365
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,4,128,1,float16,fp8,1,0.17473759651184081
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,4,128,1,float16,float16,3,0.18519519567489623
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,4,128,1,float16,fp8,3,0.17411999702453612
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,4,128,1,float16,float16,7,0.1491055965423584
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,4,128,1,float16,fp8,7,0.1750175952911377
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,4,128,1,float16,float16,15,0.1505743980407715
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,4,128,1,float16,fp8,15,0.17484960556030274
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,4,128,1,float16,float16,31,0.15392160415649414
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,4,128,1,float16,fp8,31,0.1750432014465332
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,4,128,1,float16,float16,63,0.15851680040359498
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,4,128,1,float16,fp8,63,0.18619840145111083
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,4,128,1,float16,float16,127,0.159660804271698
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,4,128,1,float16,fp8,127,0.19046080112457275
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,4,128,1,float16,float16,255,0.22106719017028809
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,4,128,1,float16,fp8,255,0.24514880180358886
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,4,128,1,float16,float16,511,0.33179678916931155
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,4,128,1,float16,fp8,511,0.3674256086349487
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,4,128,1,float16,float16,1,0.27364959716796877
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,4,128,1,float16,float16,1023,0.5897984027862548
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,4,128,1,float16,fp8,1,0.3255232095718384
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,4,128,1,float16,fp8,1023,0.5584080219268799
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,4,128,1,float16,float16,3,0.2761039972305298
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,4,128,1,float16,fp8,3,0.32382080554962156
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,4,128,1,float16,float16,7,0.27596158981323243
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,4,128,1,float16,fp8,7,0.3245503902435303
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,4,128,1,float16,float16,15,0.27593920230865476
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,4,128,1,float16,fp8,15,0.32417280673980714
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,4,128,1,float16,float16,31,0.2852992057800293
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,4,128,1,float16,fp8,31,0.3377295970916748
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,4,128,1,float16,float16,63,0.2879024028778076
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,4,128,1,float16,fp8,63,0.3472543954849243
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,4,128,1,float16,float16,127,0.29264800548553466
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,4,128,1,float16,fp8,127,0.3481904029846191
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,4,128,1,float16,float16,255,0.402510404586792
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,4,128,1,float16,fp8,255,0.4599008083343506
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,1,0.01979999989271164
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,1,0.020819200575351714
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,7,0.02064799964427948
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,4,128,1,float16,float16,511,0.6308447837829589
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,3,0.019942399859428406
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,4,128,1,float16,fp8,511,0.6990560054779053
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,3,0.020902399718761445
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,7,0.019726400077342988
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,15,0.019937600195407867
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,15,0.020614400506019592
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,31,0.019976000487804412
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,31,0.02080480009317398
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,63,0.019814400374889372
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,63,0.020609599351882935
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,127,0.01973759979009628
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,127,0.02111999988555908
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,255,0.021689599752426146
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,255,0.022776000201702118
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,511,0.024784000217914583
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,511,0.026617598533630372
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,1023,0.028649601340293884
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,1023,0.030511999130249025
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,2047,0.03727520108222961
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,2047,0.041407999396324155
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,4095,0.0578607976436615
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,4095,0.05597119927406311
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,8191,0.09079520106315613
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,8191,0.08068000078201294
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,16383,0.1521056056022644
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,16383,0.13288480043411255
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,32767,0.27294719219207764
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,1,0.02125760018825531
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,32767,0.23169119358062745
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,1,0.022099199891090392
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,3,0.021423999965190888
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,3,0.022121599316596983
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,7,0.021447999775409697
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,7,0.022193600237369538
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,15,0.021185599267482758
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,15,0.022097599506378175
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,31,0.021358400583267212
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,31,0.021987199783325195
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,63,0.021484799683094025
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,63,0.02247200012207031
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,127,0.0216048002243042
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,127,0.02232639938592911
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,255,0.02336000055074692
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,255,0.024054400622844696
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,511,0.026265600323677064
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,511,0.028163200616836546
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,1023,0.04115839898586273
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,1023,0.03761439919471741
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,2047,0.05737599730491638
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,2047,0.05457280278205871
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,4095,0.09012320041656494
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,4095,0.07970240116119384
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,8191,0.15232319831848146
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,8191,0.13190399408340453
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,16383,0.27319040298461916
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,16383,0.22909920215606688
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,32767,0.5254799842834472
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,1,0.01695999950170517
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,32767,0.42388157844543456
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,1,0.017990399897098542
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,3,0.017187200486660004
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,3,0.01785759925842285
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,7,0.017182399332523347
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,7,0.017665599286556245
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,65535,0.991214370727539
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,65535,0.8160991668701172
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,15,0.01695840060710907
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,15,0.01823360025882721
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,31,0.017108799517154695
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,31,0.018059200048446654
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,63,0.016974399983882903
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,63,0.01785600036382675
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,127,0.01744000017642975
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,127,0.018036800622940063
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,255,0.018617600202560425
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,255,0.019840000569820403
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,511,0.02157920002937317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,511,0.02396479994058609
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,1023,0.022334399819374084
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,1023,0.023654399812221526
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,2047,0.022620800137519836
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,2047,0.023713600635528565
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,4095,0.022779199481010436
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,4095,0.024376000463962554
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,8191,0.024303999543190003
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,8191,0.02606239914894104
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,16383,0.026364800333976746
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,16383,0.02696320116519928
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,32767,0.030070400238037108
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,32767,0.03071039915084839
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,65535,0.04156799912452698
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,65535,0.038780799508094786
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,1,0.0189968004822731
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,1,0.020075200498104094
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,3,0.01870719939470291
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,3,0.019753600656986236
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,7,0.018721599876880646
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,7,0.019823999702930452
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,15,0.019009600579738616
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,15,0.019683200120925903
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,31,0.018836799263954162
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,31,0.019908800721168518
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,63,0.0191103994846344
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,63,0.019841599464416503
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,127,0.018824000656604768
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,127,0.019721600413322448
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,255,0.020351999998092653
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,255,0.021993599832057953
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,511,0.02375199943780899
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,511,0.02563999891281128
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,1023,0.024622400104999543
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,1023,0.025464001297950744
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,2047,0.024775999784469604
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,2047,0.026284798979759216
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,4095,0.024795199930667877
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,4095,0.026240000128746034
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,8191,0.028420799970626832
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,65535,0.05915840268135071
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,8191,0.028401601314544677
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,16383,0.03161279857158661
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,16383,0.03192160129547119
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,32767,0.0435808002948761
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,32767,0.03875199854373932
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,65535,0.058396798372268674
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,1,0.020360000431537628
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,1,0.021201600134372712
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,3,0.020052799582481386
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,3,0.021206399798393248
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,7,0.020267200469970704
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,7,0.02142080068588257
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,15,0.020187200605869295
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,15,0.021320000290870667
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,31,0.020396800339221956
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,31,0.0213919997215271
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,63,0.02030719965696335
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,63,0.021347199380397797
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,127,0.020529599487781526
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,127,0.021223999559879303
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,255,0.022307200729846953
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,255,0.023532800376415253
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,511,0.02539680004119873
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,511,0.0270687997341156
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,1023,0.029340800642967225
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,1023,0.03144319951534271
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,2047,0.04294080138206482
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,2047,0.03747360110282898
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,4095,0.058433598279953
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,4095,0.05632320046424866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,8191,0.0940559983253479
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,8191,0.08407679796218873
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,16383,0.1545632004737854
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,16383,0.13578399419784545
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,32767,0.2774784088134766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,32767,0.2342479944229126
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,1,0.01905120015144348
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,1,0.019948799908161164
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,65535,0.521131181716919
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,3,0.019054399430751802
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,65535,0.42964801788330076
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,31,0.02000479996204376
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,3,0.019980800151824952
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,7,0.019200000166893005
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,7,0.019921599328517912
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,15,0.019072000682353974
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,15,0.02003040015697479
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,31,0.01921759992837906
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,63,0.019950400292873382
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,63,0.01906079947948456
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,127,0.018931199610233308
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,127,0.019836799800395967
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,255,0.0208639994263649
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,255,0.021671999990940095
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,511,0.02353599965572357
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,511,0.025732800364494324
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,1023,0.024985599517822265
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,1023,0.025995200872421263
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,2047,0.024902400374412537
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,2047,0.026228800415992737
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,4095,0.02656640112400055
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,4095,0.026840001344680786
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,8191,0.03174079954624176
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,8191,0.032425600290298465
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,16383,0.04374879896640778
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,16383,0.03812159895896912
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,32767,0.060547202825546265
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,32767,0.05803040266036987
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,65535,0.0923807978630066
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,65535,0.08504639863967896
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,1,0.026708799600601196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,1,0.02800160050392151
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,3,0.026630398631095887
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,3,0.028248000144958495
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,7,0.026719999313354493
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,7,0.02818560004234314
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,15,0.026532799005508423
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,15,0.028255999088287354
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,31,0.026449599862098695
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,31,0.028457599878311157
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,63,0.026843199133872987
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,63,0.02847520112991333
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,127,0.027000001072883605
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,127,0.028407999873161317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,255,0.030169600248336793
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,255,0.0323168009519577
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,511,0.043644800782203674
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,511,0.040406399965286256
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,1023,0.06119359731674194
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,1023,0.06090239882469177
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,2047,0.0941760003566742
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,2047,0.08465279936790467
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,4095,0.1545375943183899
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,4095,0.1368831992149353
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,8191,0.27786080837249755
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,8191,0.2374351978302002
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,16383,0.5271408081054687
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,1,0.03703039884567261
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,16383,0.4325232028961182
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,1,0.04021919965744018
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,32767,0.9920831680297851
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,3,0.03701919913291931
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,3,0.04015839993953705
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,7,0.03699199855327606
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,7,0.040078398585319516
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,32767,0.8212639808654785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,15,0.03747679889202118
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,15,0.0403328001499176
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,31,0.03697440028190613
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,31,0.04023999869823456
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,63,0.03715200126171112
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,511,0.06624959707260132
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,63,0.04052160084247589
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,127,0.0372655987739563
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,255,0.051800000667572024
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,127,0.04047040045261383
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,255,0.04861280024051666
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,511,0.0691424012184143
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,1023,0.10166879892349243
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,1023,0.0967631995677948
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,2047,0.16337759494781495
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,1,0.05803520083427429
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,2047,0.14547359943389893
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,3,0.06403840184211732
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,4095,0.28544158935546876
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,4095,0.24726560115814208
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,8191,0.5377439975738525
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,1,0.06452000141143799
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,3,0.05846560001373291
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,8191,0.44452319145202634
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,7,0.058422398567199704
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,7,0.06392800211906433
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,15,0.05872160196304321
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,15,0.06507359743118286
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,31,0.058606398105621335
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,31,0.06421599984169006
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,63,0.059089601039886475
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,63,0.0645632028579712
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,127,0.06621760129928589
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,127,0.0652127981185913
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,255,0.08406559824943542
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,255,0.0862496018409729
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,511,0.11253919601440429
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,511,0.11731200218200684
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,1023,0.1791200041770935
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,1023,0.1716320037841797
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,2047,0.2989840030670166
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,3,0.01937440037727356
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,2047,0.26311678886413575
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,1,0.019438399374485014
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,1,0.020260800421237946
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,4095,0.5508080005645752
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,4095,0.4640960216522217
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,3,0.020308800041675568
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,7,0.019729599356651306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,7,0.01985439956188202
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,15,0.01929759979248047
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,15,0.020102399587631225
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,31,0.019225600361824035
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,31,0.02027360051870346
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,63,0.019351999461650848
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,63,0.020107200741767882
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,127,0.01902880072593689
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,127,0.02019200026988983
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,255,0.020983999967575072
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,255,0.022596800327301027
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,511,0.02402399927377701
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,511,0.02558239996433258
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,1023,0.02512960135936737
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,1023,0.026144000887870788
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,2047,0.026635199785232544
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,2047,0.026756799221038817
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,4095,0.030063998699188233
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,4095,0.030641600489616394
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,8191,0.0430976003408432
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,8191,0.038596799969673155
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,16383,0.05910400152206421
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,16383,0.05713919997215271
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,32767,0.09264960289001464
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,32767,0.08232640027999878
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,65535,0.15277600288391113
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,65535,0.13531999588012694
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,2,128,1,float16,float16,1,0.09406239986419677
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,2,128,1,float16,fp8,1,0.10602400302886963
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,2,128,1,float16,float16,3,0.09412639737129211
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,2,128,1,float16,fp8,3,0.10514719486236572
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,2,128,1,float16,float16,7,0.09422879815101623
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,2,128,1,float16,fp8,7,0.1057695984840393
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,2,128,1,float16,float16,15,0.09463359713554383
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,2,128,1,float16,fp8,15,0.10641920566558838
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,2,128,1,float16,float16,31,0.09446079730987549
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,2,128,1,float16,fp8,31,0.10587680339813232
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,2,128,1,float16,float16,63,0.10038880109786988
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,2,128,1,float16,fp8,63,0.10665760040283204
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,2,128,1,float16,float16,127,0.10399680137634278
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,2,128,1,float16,fp8,127,0.11594079732894898
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,2,128,1,float16,float16,255,0.13635679483413696
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,2,128,1,float16,fp8,255,0.14520319700241088
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,2,128,1,float16,fp8,1023,0.30636160373687743
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,2,128,1,float16,float16,511,0.19235999584198
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,2,128,1,float16,fp8,511,0.20711839199066162
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,2,128,1,float16,float16,1023,0.31440958976745603
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,2,128,1,float16,float16,1,0.1616528034210205
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,2,128,1,float16,fp8,1,0.18437119722366332
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,2,128,1,float16,float16,2047,0.5605984210968018
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,2,128,1,float16,float16,3,0.1611616015434265
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,2,128,1,float16,fp8,2047,0.49365921020507814
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,2,128,1,float16,fp8,3,0.1837440013885498
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,2,128,1,float16,float16,7,0.16039999723434448
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,2,128,1,float16,fp8,7,0.18296159505844117
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,2,128,1,float16,float16,15,0.16104639768600465
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,2,128,1,float16,fp8,15,0.18496960401535034
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,2,128,1,float16,float16,31,0.16560319662094117
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,2,128,1,float16,fp8,31,0.18505120277404785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,2,128,1,float16,float16,63,0.17280960083007812
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,2,128,1,float16,fp8,63,0.19682879447937013
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,2,128,1,float16,float16,127,0.17449120283126832
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,2,128,1,float16,fp8,127,0.19942400455474854
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,2,128,1,float16,fp8,511,0.3824448108673096
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,2,128,1,float16,float16,255,0.23342878818511964
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,2,128,1,float16,fp8,255,0.25821120738983155
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,2,128,1,float16,float16,511,0.3405791997909546
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,1,0.01952800005674362
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,1,0.020343999564647674
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,2,128,1,float16,float16,1023,0.5958784103393555
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,3,0.019449600577354433
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,2,128,1,float16,fp8,1023,0.5744016170501709
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,3,0.020398400723934174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,7,0.01950239986181259
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,7,0.020497600734233856
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,15,0.019724799692630766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,15,0.020470400154590607
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,31,0.019648000597953796
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,31,0.020420800149440765
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,63,0.019414399564266206
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,63,0.020528000593185425
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,127,0.019710400700569154
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,1023,0.026497599482536317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,127,0.020500800013542174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,255,0.021110400557518005
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,255,0.022438399493694305
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,511,0.024697600305080412
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,511,0.026199999451637267
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,1023,0.02717599868774414
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,2047,0.029256001114845276
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,2047,0.030827200412750243
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,4095,0.04167680144309997
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,4095,0.037745600938796996
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,8191,0.05916799902915955
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,8191,0.05707520246505737
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,16383,0.09308639764785767
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,16383,0.08265280127525329
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,32767,0.15414880514144896
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,32767,0.13461439609527587
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,1,0.021055999398231506
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,1,0.021961599588394165
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,65535,0.27355360984802246
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,65535,0.2331167936325073
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,3,0.021300800144672394
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,3,0.021675199270248413
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,7,0.02101919949054718
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,7,0.022036799788475038
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,15,0.02109919935464859
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,15,0.021859200298786165
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,31,0.02086080014705658
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,31,0.02179519981145859
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,63,0.02099359929561615
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,63,0.021780799329280853
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,511,0.02598559856414795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,127,0.021111999452114106
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,127,0.021687999367713928
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,255,0.02274399995803833
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,255,0.02417760044336319
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,511,0.02786880135536194
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,1023,0.030137598514556885
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,1023,0.03197120130062103
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,2047,0.043068799376487735
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,2047,0.03804160058498383
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,4095,0.05914239883422852
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,4095,0.056625598669052125
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,8191,0.09463359713554383
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,8191,0.08366400003433228
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,16383,0.1604575991630554
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,16383,0.13977760076522827
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,32767,0.28023519515991213
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,32767,0.2383568048477173
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,65535,0.5156032085418701
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,1,0.016271999478340148
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,65535,0.4330448150634766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,3,0.01725279986858368
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,1,0.01735360026359558
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,15,0.016246399283409117
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,3,0.0162992000579834
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,7,0.01613599956035614
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,131071,0.997374439239502
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,7,0.017423999309539796
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,131071,0.8199647903442383
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,15,0.01727840006351471
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,31,0.01640319973230362
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,255,0.01786399930715561
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,31,0.0173567995429039
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,63,0.017375999689102174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,63,0.016177600622177123
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,127,0.017046399414539337
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,127,0.017257599532604216
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,255,0.01902240067720413
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,511,0.020894399285316466
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,511,0.023455999791622162
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,1023,0.021524800360202788
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,1023,0.022859199345111846
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,2047,0.02173759937286377
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,2047,0.023451200127601622
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,4095,0.02195200026035309
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,4095,0.023294399678707122
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,8191,0.023505599796772005
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,8191,0.025577598810195924
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,16383,0.027368000149726866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,16383,0.02895680069923401
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,32767,0.028809601068496705
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,32767,0.029918399453163148
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,65535,0.033076798915863036
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,65535,0.03315680027008057
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,131071,0.0441760003566742
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,131071,0.04245760142803192
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,1,0.01691199988126755
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,1,0.01817599982023239
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,31,0.016847999393939973
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,3,0.017312000691890716
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,31,0.018134400248527527
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,3,0.01788160055875778
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,7,0.017075200378894807
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,7,0.017846399545669557
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,15,0.016996799409389494
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,15,0.018031999468803406
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,63,0.017334400117397307
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,63,0.01794400066137314
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,127,0.017030400037765504
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,127,0.018027199804782866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,255,0.018855999410152435
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,255,0.020046399533748628
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,511,0.021768000721931458
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,511,0.024510399997234346
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,1023,0.022299200296401978
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,1023,0.0241007998585701
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,2047,0.022676800191402436
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,2047,0.024374400079250336
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,4095,0.022681599855422972
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,4095,0.024241599440574645
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,8191,0.024855999648571013
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,8191,0.02627359926700592
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,16383,0.029812800884246825
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,16383,0.029815998673439027
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,32767,0.03340800106525421
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,32767,0.03406400084495544
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,65535,0.044356799125671385
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,65535,0.039875200390815733
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,131071,0.06033759713172913
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,131071,0.05889120101928711
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,1,0.019974400103092194
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,1,0.020902399718761445
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,3,0.01995680034160614
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,3,0.020795199275016784
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,7,0.020102399587631225
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,7,0.02094399929046631
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,15,0.020257599651813507
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,15,0.02114560008049011
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,31,0.01992959976196289
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,31,0.020790399610996248
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,63,0.020020799338817598
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,63,0.020956799387931824
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,127,0.019865599274635316
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,127,0.021238400042057036
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,255,0.02163040041923523
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,255,0.022732800245285033
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,511,0.024825599789619446
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,511,0.026331201195716858
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,1023,0.02754719853401184
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,1023,0.02736479938030243
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,2047,0.028916800022125246
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,2047,0.031092798709869383
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,4095,0.042638400197029115
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,4095,0.03709439933300018
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,8191,0.05978239774703979
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,8191,0.05753759741783142
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,16383,0.09774399995803833
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,16383,0.08589439988136291
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,32767,0.15954400300979615
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,32767,0.1380511999130249
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,65535,0.28129119873046876
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,65535,0.23954079151153565
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,1,0.018972800672054292
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,1,0.019724799692630766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,131071,0.5137968063354492
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,3,0.01900320053100586
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,131071,0.431928014755249
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,3,0.019868800044059755
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,7,0.01886080056428909
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,7,0.01987680047750473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,15,0.01910720020532608
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,15,0.019940799474716185
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,31,0.01879040002822876
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,31,0.019870400428771973
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,63,0.018916800618171692
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,63,0.019908800721168518
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,127,0.01926559954881668
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,127,0.019937600195407867
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,255,0.020817600190639496
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,255,0.021609599888324737
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,511,0.023548799753189086
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,511,0.025518399477005006
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,1023,0.024484799802303316
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,1023,0.02566559910774231
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,2047,0.024216000735759736
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,2047,0.02571839988231659
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,4095,0.024643200635910033
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,4095,0.026180800795555115
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,8191,0.028043198585510253
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,8191,0.028248000144958495
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,16383,0.0350383996963501
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,131071,0.09640799760818482
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,16383,0.03551680147647858
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,32767,0.04682720005512238
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,32767,0.042289599776268005
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,65535,0.0637615978717804
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,65535,0.061660802364349364
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,131071,0.08840159773826599
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,1,0.023160000145435334
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,1,0.023737600445747374
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,3,0.02282720059156418
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,3,0.02359039932489395
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,7,0.022467200458049775
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,7,0.023736000061035156
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,15,0.02284960001707077
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,15,0.023476800322532652
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,31,0.022784000635147093
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,31,0.023688000440597535
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,63,0.022819200158119203
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,63,0.0235727995634079
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,127,0.022673599421977997
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,127,0.02375999987125397
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,255,0.024676799774169922
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,255,0.025740799307823182
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,511,0.027835199236869813
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,511,0.029860800504684447
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,1023,0.043347200751304625
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,1023,0.03750720024108887
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,2047,0.058081597089767456
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,2047,0.05585759878158569
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,4095,0.09157599806785584
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,4095,0.08040639758110046
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,8191,0.15297759771347047
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,8191,0.13333120346069335
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,16383,0.2754096031188965
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,16383,0.23299360275268555
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,32767,0.5074624061584473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,1,0.029731199145317078
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,32767,0.4242959976196289
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,7,0.029334399104118346
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,1,0.03126400113105774
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,3,0.02964479923248291
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,3,0.03129599988460541
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,65535,0.9938464164733887
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,7,0.03136320114135742
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,65535,0.8153216361999511
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,15,0.029569599032402038
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,15,0.03139680027961731
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,31,0.02956799864768982
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,31,0.0313616007566452
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,63,0.02985920011997223
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,63,0.03236800134181976
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,127,0.029662400484085083
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,127,0.03151040077209473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,255,0.033486399054527285
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,255,0.03516319990158081
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,2047,0.09815359711647034
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,511,0.04652320146560669
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,511,0.04323199987411499
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,4095,0.13996959924697877
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,1023,0.06588640213012695
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,1023,0.0644208014011383
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,2047,0.08812959790229798
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,4095,0.1577455997467041
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,8191,0.2802864074707031
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,1,0.04735519886016846
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,8191,0.24281759262084962
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,1,0.04448480010032654
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,16383,0.5197984218597412
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,3,0.04392479956150055
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,16383,0.43845601081848146
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,3,0.04697760045528412
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,7,0.04419200122356415
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,7,0.04767360091209412
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,15,0.04459199905395508
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,15,0.04719200134277344
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,31,0.04389120042324066
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,31,0.04743199944496155
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,63,0.044259199500083925
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,63,0.04779520034790039
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,127,0.04563519954681396
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,127,0.0480432003736496
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,255,0.0588591992855072
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,255,0.0560912013053894
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,511,0.07461280226707459
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,511,0.07659839987754821
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,1023,0.11047519445419311
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,1023,0.10494879484176636
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,2047,0.17127039432525634
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,2047,0.15305279493331908
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,4095,0.2948528051376343
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,4095,0.25604000091552737
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,1,0.01897599995136261
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,8191,0.5254271984100342
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,8191,0.45133438110351565
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,1,0.020206399261951447
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,15,0.020371200144290925
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,3,0.019167999923229217
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,3,0.02011840045452118
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,7,0.01919520050287247
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,7,0.020153599977493285
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,15,0.019288000464439393
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,31,0.019179199635982514
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,31,0.020033599436283113
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,63,0.019424000382423402
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,63,0.020156799256801604
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,127,0.019249600172042847
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,127,0.019791999459266664
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,255,0.02083519995212555
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,255,0.022011199593544008
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,511,0.023870399594306944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,511,0.02568320035934448
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,1023,0.024860799312591553
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,1023,0.02640480101108551
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,8191,0.03251680135726929
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,2047,0.025062400102615356
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,2047,0.025947201251983642
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,4095,0.026683199405670165
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,4095,0.026923200488090514
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,65535,0.0984607994556427
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,8191,0.03200959861278534
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,16383,0.04714879989624023
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,16383,0.041833600401878356
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,32767,0.06330879926681518
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,32767,0.061735999584197995
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,65535,0.08908799886703492
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,131071,0.15853279829025269
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,131071,0.14258079528808593
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,1,0.06541439890861511
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,1,0.07103520035743713
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,3,0.06546400189399719
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,3,0.07123200297355652
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,7,0.06511359810829162
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,7,0.07137759923934936
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,15,0.06540480256080627
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,15,0.07119359970092773
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,31,0.06556479930877686
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,31,0.07147039771080017
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,63,0.06564159989356995
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,63,0.07161440253257752
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,127,0.07344959974288941
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,127,0.07258719801902772
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,255,0.08980799913406372
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,255,0.09448800086975098
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,511,0.12076480388641357
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,511,0.12598559856414795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,1023,0.18443360328674316
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,1023,0.1771232008934021
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,2047,0.3035248041152954
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,2047,0.2713776111602783
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,1,1,128,1,float16,float16,1,0.1052623987197876
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,4095,0.5382383823394775
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,1,1,128,1,float16,fp8,1,0.11731840372085571
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,4095,0.4688687801361084
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,1,1,128,1,float16,float16,3,0.10594240427017212
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,1,1,128,1,float16,fp8,3,0.11670559644699097
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,1,1,128,1,float16,fp8,7,0.11717920303344727
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,1,1,128,1,float16,float16,7,0.10595359802246093
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,1,1,128,1,float16,float16,15,0.10553439855575561
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,1,1,128,1,float16,fp8,15,0.11748319864273071
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,1,1,128,1,float16,float16,31,0.10526880025863647
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,1,1,128,1,float16,fp8,31,0.11802239418029785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,1,1,128,1,float16,float16,63,0.11335999965667724
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,1,1,128,1,float16,fp8,63,0.11896159648895263
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,1,1,128,1,float16,float16,127,0.11700160503387451
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,1,1,128,1,float16,fp8,127,0.12752799987792968
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,1,1,128,1,float16,float16,255,0.14707039594650267
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,1,1,128,1,float16,fp8,255,0.15717920064926147
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,1,1,128,1,float16,float16,1023,0.32879838943481443
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,1,1,128,1,float16,float16,511,0.20562078952789306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,1,1,128,1,float16,fp8,511,0.2181551933288574
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,1,0.01937599927186966
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,1,1,128,1,float16,fp8,1023,0.3212768077850342
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,7,0.020136000216007234
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,3,0.019057600200176238
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,1,0.020555199682712556
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,1,1,128,1,float16,float16,2047,0.5609360218048096
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,3,0.020500800013542174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,7,0.019494399428367615
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,1,1,128,1,float16,fp8,2047,0.5074656009674072
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,15,0.02003040015697479
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,15,0.019721600413322448
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,31,0.01934880018234253
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,31,0.0202224001288414
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,63,0.01966720074415207
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,63,0.020080000162124634
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,127,0.019651199877262115
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,127,0.02028000056743622
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,255,0.0213359996676445
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,255,0.022649599611759184
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,511,0.02399519979953766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,511,0.02565760016441345
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,1023,0.025177600979804992
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,1023,0.02635039985179901
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,2047,0.02683840095996857
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,2047,0.026875200867652892
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,4095,0.030302399396896364
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,4095,0.030691200494766237
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,8191,0.04311679899692535
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,8191,0.03871200084686279
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,16383,0.06246560215950012
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,16383,0.060059201717376706
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,32767,0.09662879705429077
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,131071,0.23733758926391602
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,32767,0.08607199788093567
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,65535,0.15867680311203003
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,65535,0.13786720037460326
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,131071,0.2792479991912842
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,1,128,1,float16,float16,1,0.02173600047826767
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,1,128,1,float16,fp8,1,0.02213920056819916
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,1,128,1,float16,float16,3,0.02176000028848648
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,1,128,1,float16,fp8,3,0.022364799678325654
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,1,128,1,float16,float16,7,0.02138720005750656
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,1,128,1,float16,fp8,7,0.0222448006272316
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,1,128,1,float16,float16,15,0.021715199947357176
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,1,128,1,float16,fp8,15,0.02248319983482361
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,1,128,1,float16,float16,31,0.02157920002937317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,1,128,1,float16,fp8,31,0.02211039960384369
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,1,128,1,float16,float16,63,0.021984000504016877
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,1,128,1,float16,fp8,63,0.022142399847507478
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,1,128,1,float16,float16,127,0.02163680046796799
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,1,128,1,float16,fp8,127,0.02221920043230057
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,1,128,1,float16,float16,255,0.02385759949684143
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,1,128,1,float16,fp8,255,0.024156799912452696
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,1,128,1,float16,float16,511,0.02691679894924164
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,1,128,1,float16,fp8,511,0.027668800950050355
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,1,128,1,float16,float16,1023,0.03804000020027161
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,1,128,1,float16,fp8,1023,0.03482879996299744
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,1,128,1,float16,float16,2047,0.04934720098972321
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,1,128,1,float16,fp8,2047,0.04692640006542206
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,2,128,1,float16,float16,1,0.021521599590778352
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,2,128,1,float16,fp8,1,0.022169600427150726
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,2,128,1,float16,fp8,3,0.022251200675964356
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,2,128,1,float16,float16,3,0.02134400010108948
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,2,128,1,float16,float16,7,0.02117599993944168
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,2,128,1,float16,fp8,7,0.02245279997587204
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,2,128,1,float16,float16,15,0.021411199867725373
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,2,128,1,float16,fp8,15,0.02226720005273819
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,2,128,1,float16,float16,31,0.021665599942207337
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,2,128,1,float16,fp8,31,0.02218720018863678
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,2,128,1,float16,float16,63,0.021568000316619873
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,2,128,1,float16,fp8,63,0.022593599557876588
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,2,128,1,float16,float16,127,0.0216511994600296
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,2,128,1,float16,fp8,127,0.02250239998102188
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,2,128,1,float16,float16,255,0.023502400517463683
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,2,128,1,float16,fp8,255,0.024376000463962554
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,2,128,1,float16,fp8,2047,0.055612802505493164
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,2,128,1,float16,float16,511,0.0268640011548996
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,2,128,1,float16,fp8,511,0.02797119915485382
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,2,128,1,float16,float16,1023,0.04347839951515198
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,2,128,1,float16,fp8,1023,0.03780319988727569
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,2,128,1,float16,float16,2047,0.058955198526382445
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,4,128,1,float16,float16,1,0.025204798579216002
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,4,128,1,float16,fp8,1,0.026926401257514953
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,4,128,1,float16,float16,3,0.025336000323295593
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,4,128,1,float16,fp8,3,0.026862400770187377
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,4,128,1,float16,float16,7,0.025319999456405638
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,4,128,1,float16,fp8,7,0.026795199513435362
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,4,128,1,float16,float16,63,0.025393599271774293
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,4,128,1,float16,float16,15,0.02513439953327179
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,4,128,1,float16,fp8,15,0.02669599950313568
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,4,128,1,float16,float16,31,0.02540160119533539
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,4,128,1,float16,fp8,31,0.02715199887752533
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,4,128,1,float16,fp8,63,0.026923200488090514
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,4,128,1,float16,float16,127,0.02561280131340027
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,4,128,1,float16,fp8,127,0.026814401149749756
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,4,128,1,float16,float16,255,0.02948479950428009
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,4,128,1,float16,fp8,255,0.03113600015640259
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,4,128,1,float16,float16,2047,0.09500960111618043
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,4,128,1,float16,float16,511,0.04314239919185638
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,4,128,1,float16,fp8,511,0.03929600119590759
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,4,128,1,float16,float16,1023,0.0626031994819641
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,4,128,1,float16,fp8,1023,0.06052960157394409
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,4,128,1,float16,fp8,2047,0.08414400219917298
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,8,128,1,float16,float16,1,0.03282879889011383
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,8,128,1,float16,fp8,1,0.036111998558044436
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,8,128,1,float16,float16,3,0.033081600069999696
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,8,128,1,float16,fp8,3,0.03623040020465851
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,8,128,1,float16,float16,7,0.03289119899272919
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,8,128,1,float16,fp8,7,0.035872000455856326
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,8,128,1,float16,float16,15,0.03254239857196808
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,8,128,1,float16,fp8,15,0.03578239977359772
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,8,128,1,float16,fp8,31,0.03607519865036011
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,8,128,1,float16,float16,31,0.03290559947490692
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,8,128,1,float16,float16,63,0.03312320113182068
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,8,128,1,float16,fp8,63,0.03638400137424469
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,8,128,1,float16,float16,127,0.03378880023956299
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,8,128,1,float16,fp8,127,0.03622879981994629
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,8,128,1,float16,float16,255,0.049348801374435425
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,8,128,1,float16,fp8,255,0.04567359983921051
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,8,128,1,float16,float16,511,0.06297439932823182
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,8,128,1,float16,fp8,511,0.06527360081672669
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,8,128,1,float16,float16,1023,0.09902880191802979
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,8,128,1,float16,fp8,1023,0.0938256025314331
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,8,128,1,float16,float16,2047,0.16085599660873412
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,128,8,128,1,float16,fp8,2047,0.14221279621124266
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,1,128,1,float16,float16,1,0.01672160029411316
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,1,128,1,float16,fp8,1,0.01763039976358414
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,1,128,1,float16,float16,3,0.016809600591659545
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,1,128,1,float16,fp8,3,0.017601600289344786
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,1,128,1,float16,float16,7,0.016764800250530242
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,1,128,1,float16,fp8,7,0.017611199617385866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,1,128,1,float16,float16,15,0.01695840060710907
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,1,128,1,float16,fp8,15,0.017654399573802947
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,1,128,1,float16,float16,31,0.01656640022993088
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,1,128,1,float16,fp8,31,0.017627200484275816
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,1,128,1,float16,float16,63,0.016729600727558136
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,1,128,1,float16,fp8,63,0.017606399953365326
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,1,128,1,float16,float16,127,0.01664000004529953
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,1,128,1,float16,fp8,127,0.017448000609874725
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,1,128,1,float16,float16,255,0.018302400410175324
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,1,128,1,float16,fp8,255,0.01958079934120178
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,1,128,1,float16,float16,511,0.021240000426769257
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,1,128,1,float16,fp8,511,0.02348800003528595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,1,128,1,float16,float16,1023,0.021547199785709382
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,1,128,1,float16,fp8,1023,0.022937600314617158
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,1,128,1,float16,float16,2047,0.021991999447345735
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,1,128,1,float16,fp8,2047,0.023563200235366823
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,2,128,1,float16,float16,1,0.017448000609874725
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,2,128,1,float16,fp8,1,0.01823039948940277
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,2,128,1,float16,float16,3,0.017108799517154695
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,2,128,1,float16,fp8,3,0.018131199479103088
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,2,128,1,float16,float16,7,0.01720159947872162
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,2,128,1,float16,fp8,7,0.018063999712467194
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,2,128,1,float16,float16,15,0.01687999963760376
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,2,128,1,float16,fp8,15,0.018004800379276275
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,2,128,1,float16,float16,31,0.017263999581336974
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,2,128,1,float16,fp8,31,0.018219199776649476
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,2,128,1,float16,float16,63,0.01703200042247772
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,2,128,1,float16,fp8,63,0.018108800053596497
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,2,128,1,float16,float16,127,0.017179200053215028
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,2,128,1,float16,fp8,127,0.01794400066137314
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,2,128,1,float16,float16,255,0.018905599415302277
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,2,128,1,float16,fp8,255,0.02001439929008484
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,2,128,1,float16,float16,511,0.02173600047826767
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,2,128,1,float16,fp8,511,0.024051199853420257
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,2,128,1,float16,float16,1023,0.021835200488567352
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,2,128,1,float16,fp8,1023,0.02317280024290085
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,2,128,1,float16,float16,2047,0.02202560007572174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,2,128,1,float16,fp8,2047,0.023417599499225616
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,4,128,1,float16,float16,1,0.01855680048465729
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,4,128,1,float16,fp8,1,0.019832000136375427
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,4,128,1,float16,float16,3,0.018995200097560883
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,4,128,1,float16,fp8,3,0.01977279931306839
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,4,128,1,float16,float16,7,0.019072000682353974
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,4,128,1,float16,fp8,7,0.0200095996260643
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,4,128,1,float16,float16,15,0.019073599576950075
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,4,128,1,float16,fp8,15,0.01977439969778061
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,4,128,1,float16,float16,31,0.018699200451374055
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,4,128,1,float16,fp8,31,0.019631999731063842
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,4,128,1,float16,float16,63,0.019092799723148347
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,4,128,1,float16,fp8,63,0.01972000002861023
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,4,128,1,float16,float16,127,0.018751999735832213
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,4,128,1,float16,fp8,127,0.019809600710868836
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,4,128,1,float16,float16,255,0.020364800095558168
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,4,128,1,float16,fp8,255,0.021835200488567352
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,4,128,1,float16,float16,511,0.02362399995326996
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,4,128,1,float16,fp8,511,0.02528800070285797
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,4,128,1,float16,float16,1023,0.0234592005610466
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,4,128,1,float16,fp8,1023,0.024566400051116943
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,4,128,1,float16,float16,2047,0.023732799291610717
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,4,128,1,float16,fp8,2047,0.024699200689792634
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,8,128,1,float16,float16,1,0.018727999925613404
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,8,128,1,float16,fp8,1,0.019971199333667755
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,8,128,1,float16,float16,3,0.0191103994846344
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,8,128,1,float16,fp8,3,0.019871999323368073
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,8,128,1,float16,float16,7,0.01900479942560196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,8,128,1,float16,fp8,7,0.01992799937725067
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,8,128,1,float16,float16,15,0.019100800156593323
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,8,128,1,float16,fp8,15,0.01956319957971573
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,8,128,1,float16,float16,31,0.018988800048828126
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,8,128,1,float16,fp8,31,0.01976799964904785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,8,128,1,float16,float16,63,0.018982400000095368
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,8,128,1,float16,fp8,63,0.019815999269485473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,8,128,1,float16,float16,127,0.018910400569438934
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,8,128,1,float16,fp8,127,0.019732800126075745
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,8,128,1,float16,float16,255,0.020491200685501098
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,8,128,1,float16,fp8,255,0.022099199891090392
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,8,128,1,float16,float16,511,0.02346560060977936
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,8,128,1,float16,fp8,511,0.025411200523376466
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,8,128,1,float16,float16,1023,0.023294399678707122
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,8,128,1,float16,fp8,1023,0.02470400035381317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,8,128,1,float16,float16,2047,0.024134400486946105
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,128,8,128,1,float16,fp8,2047,0.024956800043582916
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,1,128,1,float16,float16,1,0.017564800381660462
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,1,128,1,float16,fp8,1,0.01830720007419586
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,1,128,1,float16,float16,3,0.017521600425243377
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,1,128,1,float16,fp8,3,0.018372799456119537
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,1,128,1,float16,float16,7,0.01740639954805374
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,1,128,1,float16,fp8,7,0.018273599445819855
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,1,128,1,float16,float16,15,0.017416000366210938
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,1,128,1,float16,fp8,15,0.018427200615406036
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,1,128,1,float16,float16,31,0.017528000473976135
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,1,128,1,float16,fp8,31,0.018265600502490997
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,1,128,1,float16,float16,63,0.017664000391960144
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,1,128,1,float16,fp8,63,0.01842560023069382
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,1,128,1,float16,float16,127,0.017392000555992125
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,1,128,1,float16,fp8,127,0.018478399515151976
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,1,128,1,float16,float16,255,0.019099199771881105
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,1,128,1,float16,fp8,255,0.02038400024175644
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,1,128,1,float16,float16,511,0.02222079932689667
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,1,128,1,float16,fp8,511,0.024403199553489685
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,1,128,1,float16,float16,1023,0.0225040003657341
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,1,128,1,float16,fp8,1023,0.023817600309848787
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,1,128,1,float16,float16,2047,0.022951999306678773
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,1,128,1,float16,fp8,2047,0.024347199499607085
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,2,128,1,float16,float16,1,0.019054399430751802
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,2,128,1,float16,fp8,1,0.01979680061340332
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,2,128,1,float16,float16,3,0.019142399728298187
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,2,128,1,float16,fp8,3,0.019817599654197694
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,2,128,1,float16,float16,7,0.019011199474334717
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,2,128,1,float16,fp8,7,0.019700799882411957
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,2,128,1,float16,float16,15,0.018958400189876556
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,2,128,1,float16,fp8,15,0.01971839964389801
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,2,128,1,float16,float16,31,0.01902880072593689
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,2,128,1,float16,fp8,31,0.01984799951314926
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,2,128,1,float16,float16,63,0.0189423993229866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,2,128,1,float16,fp8,63,0.019976000487804412
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,2,128,1,float16,float16,127,0.019355200231075287
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,2,128,1,float16,fp8,127,0.019806399941444397
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,2,128,1,float16,float16,255,0.02067999988794327
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,2,128,1,float16,fp8,255,0.021910400688648225
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,2,128,1,float16,float16,511,0.023737600445747374
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,2,128,1,float16,fp8,511,0.025920000672340394
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,2,128,1,float16,float16,1023,0.023931199312210084
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,2,128,1,float16,fp8,1023,0.02499680072069168
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,2,128,1,float16,float16,2047,0.024111999571323393
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,2,128,1,float16,fp8,2047,0.02540319859981537
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,4,128,1,float16,fp8,1,0.019896000623703003
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,4,128,1,float16,float16,1,0.01906079947948456
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,4,128,1,float16,float16,3,0.01906079947948456
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,4,128,1,float16,fp8,3,0.01992799937725067
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,4,128,1,float16,float16,7,0.018991999328136444
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,4,128,1,float16,fp8,7,0.020033599436283113
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,4,128,1,float16,float16,15,0.019219200313091277
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,4,128,1,float16,fp8,15,0.019934399425983428
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,4,128,1,float16,float16,31,0.019105599820613862
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,4,128,1,float16,fp8,31,0.019732800126075745
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,4,128,1,float16,float16,63,0.0189968004822731
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,4,128,1,float16,fp8,63,0.019912000000476836
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,4,128,1,float16,float16,127,0.019334399700164796
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,4,128,1,float16,fp8,127,0.020150400698184967
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,4,128,1,float16,fp8,1023,0.024804799258708952
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,4,128,1,float16,float16,255,0.02084160000085831
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,4,128,1,float16,fp8,255,0.021904000639915468
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,4,128,1,float16,float16,511,0.023647999763488768
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,4,128,1,float16,fp8,511,0.0254287987947464
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,4,128,1,float16,float16,1023,0.023695999383926393
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,4,128,1,float16,float16,2047,0.023955200612545014
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,4,128,1,float16,fp8,2047,0.02563999891281128
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,8,128,1,float16,float16,1,0.01916159987449646
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,8,128,1,float16,fp8,1,0.020001600682735442
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,8,128,1,float16,float16,3,0.01929599940776825
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,8,128,1,float16,fp8,3,0.019862399995326997
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,8,128,1,float16,float16,7,0.01902559995651245
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,8,128,1,float16,fp8,7,0.0197952002286911
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,8,128,1,float16,float16,15,0.01934880018234253
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,8,128,1,float16,fp8,15,0.019966399669647215
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,8,128,1,float16,float16,31,0.01932159960269928
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,8,128,1,float16,fp8,31,0.02007199972867966
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,8,128,1,float16,float16,63,0.01915840059518814
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,8,128,1,float16,fp8,63,0.019912000000476836
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,8,128,1,float16,float16,127,0.019556799530982973
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,8,128,1,float16,float16,1023,0.023904000222682954
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,8,128,1,float16,fp8,127,0.020311999320983886
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,8,128,1,float16,float16,255,0.0208064004778862
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,8,128,1,float16,fp8,255,0.021860800683498383
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,8,128,1,float16,float16,511,0.023937599360942842
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,8,128,1,float16,fp8,511,0.02545439898967743
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,1,128,1,float16,float16,3,0.020428800582885744
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,8,128,1,float16,fp8,1023,0.025143998861312866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,8,128,1,float16,float16,2047,0.025887998938560485
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,128,8,128,1,float16,fp8,2047,0.026054400205612182
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,1,128,1,float16,float16,1,0.02062560021877289
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,1,128,1,float16,fp8,1,0.021185599267482758
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,1,128,1,float16,fp8,3,0.02128159999847412
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,1,128,1,float16,float16,7,0.020446400344371795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,1,128,1,float16,fp8,7,0.021241599321365358
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,1,128,1,float16,float16,15,0.020871999859809875
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,1,128,1,float16,fp8,15,0.021087999641895293
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,1,128,1,float16,float16,31,0.02051199972629547
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,1,128,1,float16,fp8,31,0.02139039933681488
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,1,128,1,float16,float16,63,0.020427200198173522
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,1,128,1,float16,fp8,63,0.021164800226688384
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,1,128,1,float16,float16,127,0.020908799767494202
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,1,128,1,float16,fp8,127,0.02131199985742569
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,1,128,1,float16,float16,255,0.022515200078487396
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,1,128,1,float16,fp8,255,0.023212799429893495
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,1,128,1,float16,float16,511,0.025465598702430724
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,1,128,1,float16,fp8,511,0.0270224004983902
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,1,128,1,float16,float16,1023,0.028411200642585753
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,1,128,1,float16,fp8,1023,0.029164800047874452
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,1,128,1,float16,float16,2047,0.03309760093688965
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,1,128,1,float16,fp8,2047,0.03296320140361786
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,2,128,1,float16,float16,1,0.020751999318599702
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,2,128,1,float16,fp8,1,0.021473599970340727
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,2,128,1,float16,float16,3,0.020158399641513825
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,2,128,1,float16,fp8,3,0.021191999316215515
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,2,128,1,float16,float16,7,0.02059520035982132
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,2,128,1,float16,fp8,7,0.021118399500846863
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,2,128,1,float16,float16,15,0.02051360011100769
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,2,128,1,float16,fp8,15,0.02144960016012192
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,2,128,1,float16,float16,31,0.020451200008392335
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,2,128,1,float16,fp8,31,0.021083199977874757
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,2,128,1,float16,float16,63,0.02030719965696335
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,2,128,1,float16,fp8,63,0.021456000208854676
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,2,128,1,float16,float16,127,0.02083040028810501
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,2,128,1,float16,fp8,127,0.021096000075340272
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,2,128,1,float16,float16,255,0.02218240052461624
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,2,128,1,float16,fp8,255,0.02309119999408722
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,2,128,1,float16,float16,511,0.02571839988231659
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,2,128,1,float16,fp8,511,0.027236801385879517
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,2,128,1,float16,float16,1023,0.02988480031490326
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,2,128,1,float16,fp8,1023,0.031220799684524535
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,2,128,1,float16,float16,2047,0.0444927990436554
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,2,128,1,float16,fp8,2047,0.04000959992408752
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,4,128,1,float16,float16,1,0.02093279957771301
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,4,128,1,float16,fp8,1,0.021675199270248413
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,4,128,1,float16,float16,3,0.02088959962129593
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,4,128,1,float16,fp8,3,0.02152640074491501
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,4,128,1,float16,float16,7,0.020472000539302825
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,4,128,1,float16,fp8,7,0.02160159945487976
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,4,128,1,float16,float16,15,0.02075680047273636
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,4,128,1,float16,fp8,15,0.021687999367713928
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,4,128,1,float16,float16,31,0.020683200657367708
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,4,128,1,float16,fp8,31,0.02154559940099716
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,4,128,1,float16,float16,63,0.020948800444602966
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,4,128,1,float16,fp8,63,0.021687999367713928
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,4,128,1,float16,float16,127,0.020905600488185884
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,4,128,1,float16,fp8,127,0.021835200488567352
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,4,128,1,float16,float16,255,0.022892799973487855
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,4,128,1,float16,fp8,255,0.023489600419998168
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,4,128,1,float16,float16,511,0.02612000107765198
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,4,128,1,float16,fp8,511,0.027590399980545043
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,4,128,1,float16,float16,1023,0.04098879992961883
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,4,128,1,float16,fp8,1023,0.03531520068645477
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,4,128,1,float16,float16,2047,0.056548798084259035
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,4,128,1,float16,fp8,2047,0.05475360155105591
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,8,128,1,float16,float16,1,0.024806399643421174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,8,128,1,float16,fp8,1,0.026230400800704955
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,8,128,1,float16,float16,3,0.024694399535655977
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,8,128,1,float16,fp8,3,0.02621760070323944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,8,128,1,float16,float16,7,0.024700799584388734
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,8,128,1,float16,fp8,7,0.02598559856414795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,8,128,1,float16,float16,15,0.02452320009469986
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,8,128,1,float16,fp8,15,0.026204800605773924
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,8,128,1,float16,float16,31,0.02476799935102463
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,8,128,1,float16,fp8,31,0.026025599241256712
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,8,128,1,float16,float16,63,0.024758400022983552
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,8,128,1,float16,fp8,63,0.026156800985336303
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,8,128,1,float16,float16,127,0.024958400428295134
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,8,128,1,float16,fp8,127,0.026454401016235352
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,8,128,1,float16,float16,255,0.0286624014377594
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,8,128,1,float16,fp8,255,0.03027839958667755
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,8,128,1,float16,float16,511,0.04134719967842102
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,8,128,1,float16,fp8,511,0.03838399946689606
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,8,128,1,float16,float16,1023,0.059222400188446045
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,8,128,1,float16,fp8,1023,0.05969759821891785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,8,128,1,float16,float16,2047,0.09274079799652099
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,128,8,128,1,float16,fp8,2047,0.08319680094718933
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,1,128,1,float16,float16,1,0.0192671999335289
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,1,128,1,float16,fp8,1,0.01982080042362213
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,1,128,1,float16,float16,3,0.019190399348735808
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,1,128,1,float16,fp8,3,0.020310400426387785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,1,128,1,float16,float16,7,0.01916320025920868
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,1,128,1,float16,fp8,7,0.020177599787712098
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,1,128,1,float16,float16,15,0.01942880004644394
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,1,128,1,float16,fp8,15,0.020227199792861937
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,1,128,1,float16,float16,31,0.01929599940776825
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,1,128,1,float16,fp8,31,0.020095999538898467
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,1,128,1,float16,float16,63,0.019673599302768706
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,1,128,1,float16,fp8,63,0.019988800585269927
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,1,128,1,float16,float16,127,0.01932799965143204
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,1,128,1,float16,fp8,127,0.020134399831295013
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,1,128,1,float16,float16,255,0.020870399475097657
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,1,128,1,float16,fp8,255,0.02218399941921234
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,1,128,1,float16,float16,511,0.02369280010461807
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,1,128,1,float16,fp8,511,0.025740799307823182
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,1,128,1,float16,float16,1023,0.024398399889469145
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,1,128,1,float16,fp8,1023,0.025779199600219727
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,1,128,1,float16,float16,2047,0.024990400671958922
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,1,128,1,float16,fp8,2047,0.025915199518203737
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,2,128,1,float16,float16,1,0.019312000274658202
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,2,128,1,float16,fp8,1,0.019857600331306458
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,2,128,1,float16,float16,3,0.019115200638771056
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,2,128,1,float16,fp8,3,0.02001439929008484
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,2,128,1,float16,float16,7,0.018916800618171692
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,2,128,1,float16,fp8,7,0.020006400346755982
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,2,128,1,float16,float16,15,0.019363200664520262
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,2,128,1,float16,fp8,15,0.01977919936180115
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,2,128,1,float16,float16,31,0.0191648006439209
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,2,128,1,float16,fp8,31,0.01998720020055771
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,2,128,1,float16,float16,63,0.0191648006439209
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,2,128,1,float16,fp8,63,0.019974400103092194
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,2,128,1,float16,float16,127,0.01926079988479614
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,2,128,1,float16,fp8,127,0.01993280053138733
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,2,128,1,float16,float16,255,0.020948800444602966
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,2,128,1,float16,fp8,255,0.02173440009355545
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,2,128,1,float16,float16,511,0.02374400049448013
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,2,128,1,float16,fp8,511,0.025563201308250426
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,2,128,1,float16,float16,1023,0.024164800345897675
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,2,128,1,float16,fp8,1023,0.02526560127735138
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,2,128,1,float16,float16,2047,0.024766400456428528
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,2,128,1,float16,fp8,2047,0.025464001297950744
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,4,128,1,float16,float16,1,0.01930239945650101
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,4,128,1,float16,fp8,1,0.02008160054683685
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,4,128,1,float16,float16,3,0.01908639967441559
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,4,128,1,float16,fp8,3,0.02006720006465912
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,4,128,1,float16,float16,7,0.019096000492572783
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,4,128,1,float16,fp8,7,0.02006080001592636
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,4,128,1,float16,float16,15,0.01930239945650101
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,4,128,1,float16,fp8,15,0.02009759992361069
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,4,128,1,float16,float16,31,0.019305600225925444
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,4,128,1,float16,fp8,31,0.01991039961576462
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,4,128,1,float16,float16,63,0.01928640007972717
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,4,128,1,float16,fp8,63,0.020102399587631225
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,4,128,1,float16,float16,127,0.019236800074577332
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,4,128,1,float16,fp8,127,0.01998720020055771
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,4,128,1,float16,float16,255,0.0208639994263649
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,4,128,1,float16,fp8,255,0.021911999583244322
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,4,128,1,float16,float16,511,0.024140800535678863
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,4,128,1,float16,fp8,511,0.02557600140571594
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,4,128,1,float16,float16,1023,0.0241007998585701
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,4,128,1,float16,fp8,1023,0.025339201092720032
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,4,128,1,float16,float16,2047,0.02579520046710968
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,4,128,1,float16,fp8,2047,0.025804799795150758
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,8,128,1,float16,float16,1,0.019436800479888917
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,8,128,1,float16,fp8,1,0.020275199413299562
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,8,128,1,float16,float16,3,0.019543999433517457
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,8,128,1,float16,fp8,3,0.020044800639152528
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,8,128,1,float16,float16,7,0.019398400187492372
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,8,128,1,float16,fp8,7,0.02011840045452118
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,8,128,1,float16,float16,15,0.019380800426006317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,8,128,1,float16,fp8,15,0.02014880031347275
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,8,128,1,float16,float16,31,0.01927199959754944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,8,128,1,float16,fp8,31,0.020454399287700653
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,8,128,1,float16,float16,63,0.019545599818229675
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,8,128,1,float16,fp8,63,0.02019200026988983
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,8,128,1,float16,float16,127,0.01955839991569519
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,8,128,1,float16,fp8,127,0.020336000621318816
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,8,128,1,float16,float16,255,0.021006399393081666
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,8,128,1,float16,fp8,255,0.02215680032968521
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,8,128,1,float16,float16,511,0.024358400702476503
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,8,128,1,float16,fp8,511,0.025780799984931945
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,8,128,1,float16,float16,1023,0.02566719949245453
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,8,128,1,float16,fp8,1023,0.02566559910774231
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,8,128,1,float16,float16,2047,0.02787039875984192
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,128,8,128,1,float16,fp8,2047,0.029425600171089174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,1,128,1,float16,float16,1,0.023596799373626708
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,1,128,1,float16,fp8,1,0.0239439994096756
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,1,128,1,float16,float16,3,0.023606400191783904
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,1,128,1,float16,fp8,3,0.02407519966363907
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,1,128,1,float16,float16,7,0.023131200671195985
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,1,128,1,float16,fp8,7,0.024022400379180908
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,1,128,1,float16,float16,15,0.02333440035581589
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,1,128,1,float16,fp8,15,0.024187199771404266
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,1,128,1,float16,float16,31,0.023921599984169005
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,1,128,1,float16,fp8,31,0.024604800343513488
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,1,128,1,float16,float16,63,0.023588800430297853
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,1,128,1,float16,fp8,63,0.024184000492095948
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,1,128,1,float16,float16,127,0.024083200097084045
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,1,128,1,float16,fp8,127,0.024636800587177276
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,1,128,1,float16,float16,255,0.02550239861011505
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,1,128,1,float16,fp8,255,0.026655998826026917
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,1,128,1,float16,float16,511,0.030060800909996032
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,1,128,1,float16,fp8,511,0.03019680082798004
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,1,128,1,float16,float16,1023,0.047328001260757445
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,1,128,1,float16,fp8,1023,0.043561598658561705
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,2,128,1,float16,float16,1,0.026689600944519044
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,2,128,1,float16,fp8,1,0.028472000360488893
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,2,128,1,float16,float16,3,0.02678079903125763
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,2,128,1,float16,fp8,3,0.0289247989654541
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,2,128,1,float16,float16,7,0.026956799626350402
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,2,128,1,float16,fp8,7,0.028591999411582948
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,2,128,1,float16,float16,15,0.02677919864654541
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,2,128,1,float16,fp8,15,0.028726398944854736
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,2,128,1,float16,float16,31,0.027020800113677978
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,2,128,1,float16,fp8,31,0.028679999709129333
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,2,128,1,float16,float16,63,0.026796799898147584
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,2,128,1,float16,fp8,63,0.028916800022125246
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,2,128,1,float16,float16,127,0.027447998523712158
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,2,128,1,float16,fp8,127,0.028764799237251282
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,2,128,1,float16,float16,255,0.03136639893054962
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,2,128,1,float16,fp8,255,0.03272320032119751
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,2,128,1,float16,float16,511,0.0460752010345459
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,2,128,1,float16,fp8,511,0.04298399984836578
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,2,128,1,float16,float16,1023,0.06610080003738403
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,2,128,1,float16,fp8,1023,0.06325439810752868
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,4,128,1,float16,float16,1,0.03450720012187958
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,4,128,1,float16,fp8,1,0.03771519958972931
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,4,128,1,float16,float16,3,0.034536001086235044
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,4,128,1,float16,fp8,3,0.037859201431274414
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,4,128,1,float16,float16,7,0.034513598680496214
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,4,128,1,float16,fp8,7,0.037648001313209535
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,4,128,1,float16,float16,15,0.03461759984493255
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,4,128,1,float16,fp8,15,0.037806400656700136
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,4,128,1,float16,float16,31,0.034836798906326294
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,4,128,1,float16,fp8,31,0.037836799025535585
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,4,128,1,float16,float16,63,0.034703999757766724
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,4,128,1,float16,fp8,63,0.03797439932823181
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,4,128,1,float16,float16,127,0.038796800374984744
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,4,128,1,float16,fp8,127,0.037950399518013
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,4,128,1,float16,float16,255,0.051292800903320314
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,4,128,1,float16,fp8,255,0.04814079999923706
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,4,128,1,float16,float16,511,0.06590880155563354
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,4,128,1,float16,fp8,511,0.06699360013008118
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,4,128,1,float16,float16,1023,0.10287679433822632
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,4,128,1,float16,fp8,1023,0.09660159945487976
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,8,128,1,float16,float16,1,0.04940640032291412
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,8,128,1,float16,fp8,1,0.05526080131530762
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,8,128,1,float16,float16,3,0.049377599358558656
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,8,128,1,float16,fp8,3,0.05559999942779541
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,8,128,1,float16,float16,7,0.04915040135383606
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,8,128,1,float16,fp8,7,0.055902397632598876
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,8,128,1,float16,float16,15,0.04890399873256683
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,8,128,1,float16,fp8,15,0.0558031976222992
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,8,128,1,float16,float16,31,0.049399998784065244
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,8,128,1,float16,fp8,31,0.05546720027923584
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,8,128,1,float16,float16,63,0.052167999744415286
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,8,128,1,float16,fp8,63,0.055878400802612305
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,8,128,1,float16,float16,127,0.058169597387313844
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,8,128,1,float16,fp8,127,0.058404797315597536
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,8,128,1,float16,float16,255,0.07455360293388366
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,8,128,1,float16,fp8,255,0.0790560007095337
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,8,128,1,float16,float16,511,0.10373599529266357
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,8,128,1,float16,fp8,511,0.10705280303955078
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,8,128,1,float16,float16,1023,0.17243679761886596
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,128,8,128,1,float16,fp8,1023,0.1618783950805664
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,1,128,1,float16,float16,1,0.03018240034580231
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,1,128,1,float16,fp8,1,0.032393598556518556
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,1,128,1,float16,float16,3,0.030801600217819212
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,1,128,1,float16,fp8,3,0.03197120130062103
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,1,128,1,float16,float16,7,0.030375999212265015
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,1,128,1,float16,fp8,7,0.03177599906921387
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,1,128,1,float16,float16,15,0.030516800284385682
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,1,128,1,float16,fp8,15,0.032380801439285276
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,1,128,1,float16,float16,31,0.030353599786758424
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,1,128,1,float16,fp8,31,0.03234719932079315
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,1,128,1,float16,float16,63,0.030507200956344606
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,1,128,1,float16,fp8,63,0.03221920132637024
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,1,128,1,float16,float16,127,0.03115679919719696
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,1,128,1,float16,fp8,127,0.03216640055179596
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,1,128,1,float16,float16,255,0.041044801473617554
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,1,128,1,float16,fp8,255,0.036206400394439696
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,2,128,1,float16,float16,1,0.037457600235939026
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,2,128,1,float16,fp8,1,0.041068801283836366
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,2,128,1,float16,fp8,15,0.040915200114250184
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,2,128,1,float16,float16,3,0.03740000128746033
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,2,128,1,float16,fp8,3,0.040833601355552675
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,2,128,1,float16,fp8,31,0.04098080098628998
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,2,128,1,float16,float16,7,0.037360000610351565
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,2,128,1,float16,fp8,7,0.04072319865226746
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,2,128,1,float16,float16,15,0.03768959939479828
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,2,128,1,float16,float16,31,0.037513598799705505
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,2,128,1,float16,float16,63,0.03947519958019256
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,2,128,1,float16,fp8,63,0.04105440080165863
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,2,128,1,float16,float16,127,0.04519200026988983
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,2,128,1,float16,fp8,127,0.041115200519561766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,2,128,1,float16,float16,255,0.0562175989151001
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,2,128,1,float16,fp8,255,0.0542415976524353
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,4,128,1,float16,float16,1,0.05214560031890869
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,4,128,1,float16,fp8,1,0.05848479866981506
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,4,128,1,float16,float16,3,0.05219519734382629
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,4,128,1,float16,fp8,3,0.058924800157546996
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,4,128,1,float16,float16,7,0.05222880244255066
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,4,128,1,float16,fp8,7,0.058500802516937254
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,4,128,1,float16,float16,15,0.05229120254516602
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,4,128,1,float16,fp8,15,0.058748799562454226
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,4,128,1,float16,float16,31,0.05305119752883911
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,4,128,1,float16,fp8,31,0.05879999995231629
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,4,128,1,float16,float16,63,0.05963039994239807
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,4,128,1,float16,fp8,63,0.05968639850616455
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,4,128,1,float16,float16,127,0.06262080073356628
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,4,128,1,float16,fp8,127,0.06479520201683045
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,4,128,1,float16,float16,255,0.07961599826812744
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,4,128,1,float16,fp8,255,0.08217120170593262
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,8,128,1,float16,float16,1,0.08101919889450074
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,8,128,1,float16,fp8,1,0.09492639899253845
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,8,128,1,float16,float16,3,0.08091359734535217
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,8,128,1,float16,fp8,3,0.0940176010131836
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,8,128,1,float16,float16,7,0.08092799782752991
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,8,128,1,float16,fp8,7,0.09438719749450683
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,8,128,1,float16,float16,15,0.081140798330307
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,8,128,1,float16,fp8,15,0.09491040110588074
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,8,128,1,float16,float16,31,0.08318719863891602
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,8,128,1,float16,fp8,31,0.09552479982376098
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,8,128,1,float16,float16,63,0.09163839817047119
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,8,128,1,float16,fp8,63,0.10195360183715821
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,8,128,1,float16,float16,127,0.09424160122871399
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,8,128,1,float16,fp8,127,0.1074288010597229
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,8,128,1,float16,float16,255,0.1238927960395813
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,128,8,128,1,float16,fp8,255,0.1346943974494934
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,1,128,1,float16,float16,1,0.04636960029602051
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,1,128,1,float16,fp8,1,0.04902079999446869
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,1,128,1,float16,float16,3,0.04625760018825531
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,1,128,1,float16,fp8,3,0.04904319941997528
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,1,128,1,float16,float16,7,0.046387198567390445
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,1,128,1,float16,fp8,7,0.04888800084590912
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,1,128,1,float16,fp8,63,0.049348801374435425
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,1,128,1,float16,float16,15,0.047809600830078125
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,1,128,1,float16,fp8,15,0.04886879920959473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,1,128,1,float16,float16,31,0.04661279916763306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,1,128,1,float16,fp8,31,0.04901120066642761
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,1,128,1,float16,float16,63,0.05345119833946228
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,1,128,1,float16,float16,127,0.056948798894882205
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,1,128,1,float16,fp8,127,0.054232001304626465
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,2,128,1,float16,float16,1,0.059601598978042604
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,2,128,1,float16,fp8,1,0.066048002243042
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,2,128,1,float16,float16,3,0.06066399812698364
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,2,128,1,float16,fp8,3,0.06606559753417969
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,2,128,1,float16,float16,7,0.060844802856445314
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,2,128,1,float16,fp8,7,0.06649119853973388
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,2,128,1,float16,float16,15,0.06134719848632812
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,2,128,1,float16,fp8,15,0.06667839884757995
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,2,128,1,float16,float16,31,0.06359360218048096
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,2,128,1,float16,fp8,31,0.06720960140228271
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,2,128,1,float16,fp8,63,0.07137600183486939
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,2,128,1,float16,float16,63,0.06899039745330811
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,2,128,1,float16,float16,127,0.07266240119934082
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,2,128,1,float16,fp8,127,0.07610080242156983
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,4,128,1,float16,float16,1,0.08908799886703492
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,4,128,1,float16,fp8,1,0.10085920095443726
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,4,128,1,float16,float16,3,0.08797600269317626
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,4,128,1,float16,fp8,3,0.10108799934387207
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,4,128,1,float16,float16,7,0.09079200029373169
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,4,128,1,float16,fp8,7,0.10134400129318237
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,4,128,1,float16,fp8,63,0.11095520257949829
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,4,128,1,float16,float16,15,0.09345759749412537
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,4,128,1,float16,fp8,15,0.10177439451217651
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,4,128,1,float16,float16,31,0.09745919704437256
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,4,128,1,float16,fp8,31,0.10410079956054688
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,4,128,1,float16,float16,63,0.10014400482177735
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,8,128,1,float16,fp8,3,0.1711184024810791
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,4,128,1,float16,float16,127,0.10386240482330322
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,4,128,1,float16,fp8,127,0.11529760360717774
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,8,128,1,float16,float16,1,0.14728000164031982
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,8,128,1,float16,fp8,1,0.17148799896240235
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,8,128,1,float16,float16,3,0.14638240337371827
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,8,128,1,float16,float16,7,0.1488095998764038
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,8,128,1,float16,float16,15,0.1548095941543579
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,8,128,1,float16,fp8,7,0.16928479671478272
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,8,128,1,float16,fp8,15,0.17269439697265626
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,8,128,1,float16,float16,31,0.15834239721298218
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,8,128,1,float16,fp8,31,0.18076000213623047
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,8,128,1,float16,float16,63,0.15928159952163695
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,8,128,1,float16,fp8,63,0.18516000509262084
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,8,128,1,float16,float16,127,0.16460479497909547
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,1,128,1,float16,float16,1,0.019625599682331085
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,128,8,128,1,float16,fp8,127,0.19403840303421022
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,1,128,1,float16,fp8,1,0.020233599841594695
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,1,128,1,float16,float16,3,0.019726400077342988
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,1,128,1,float16,fp8,3,0.020503999292850496
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,1,128,1,float16,float16,7,0.01961279958486557
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,1,128,1,float16,fp8,7,0.020473599433898926
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,1,128,1,float16,float16,15,0.019366399943828584
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,1,128,1,float16,fp8,15,0.0205487996339798
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,1,128,1,float16,float16,31,0.01953279972076416
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,1,128,1,float16,fp8,31,0.020688000321388244
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,1,128,1,float16,float16,63,0.019592000544071196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,1,128,1,float16,fp8,63,0.020427200198173522
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,1,128,1,float16,float16,127,0.019499200582504272
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,1,128,1,float16,fp8,127,0.02014079988002777
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,1,128,1,float16,float16,255,0.021406400203704833
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,1,128,1,float16,fp8,255,0.02210240066051483
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,1,128,1,float16,float16,511,0.024379199743270873
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,1,128,1,float16,fp8,511,0.026148799061775207
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,1,128,1,float16,float16,1023,0.0248416006565094
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,1,128,1,float16,fp8,1023,0.02585279941558838
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,1,128,1,float16,float16,2047,0.025598400831222536
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,1,128,1,float16,fp8,2047,0.026416000723838807
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,2,128,1,float16,float16,1,0.019439999759197236
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,2,128,1,float16,fp8,1,0.020499199628829956
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,2,128,1,float16,float16,3,0.019547200202941893
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,2,128,1,float16,fp8,3,0.020100800693035124
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,2,128,1,float16,float16,7,0.019407999515533448
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,2,128,1,float16,fp8,7,0.020231999456882477
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,2,128,1,float16,float16,15,0.019655999541282655
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,2,128,1,float16,fp8,15,0.020151999592781068
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,2,128,1,float16,float16,31,0.019500799477100372
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,2,128,1,float16,fp8,31,0.020260800421237946
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,2,128,1,float16,float16,63,0.019595199823379518
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,2,128,1,float16,fp8,63,0.020078399777412416
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,2,128,1,float16,float16,127,0.019414399564266206
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,2,128,1,float16,fp8,127,0.02014400064945221
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,2,128,1,float16,float16,255,0.021356800198554994
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,2,128,1,float16,fp8,255,0.022494399547576906
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,2,128,1,float16,float16,511,0.024244800209999084
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,2,128,1,float16,fp8,511,0.025763198733329773
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,2,128,1,float16,float16,1023,0.024672000110149382
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,2,128,1,float16,fp8,1023,0.02542400062084198
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,2,128,1,float16,float16,2047,0.02659359872341156
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,2,128,1,float16,fp8,2047,0.026468798518180847
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,4,128,1,float16,float16,1,0.01977760046720505
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,4,128,1,float16,fp8,1,0.020503999292850496
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,4,128,1,float16,float16,3,0.019568000733852387
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,4,128,1,float16,fp8,3,0.020363199710845947
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,4,128,1,float16,float16,7,0.019679999351501463
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,4,128,1,float16,fp8,7,0.02034880071878433
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,4,128,1,float16,float16,15,0.019728000462055206
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,4,128,1,float16,fp8,15,0.02045599967241287
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,4,128,1,float16,float16,31,0.019513599574565887
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,4,128,1,float16,fp8,31,0.02044160068035126
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,4,128,1,float16,float16,63,0.019760000705718993
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,4,128,1,float16,fp8,63,0.020399999618530274
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,4,128,1,float16,float16,127,0.019859200716018675
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,4,128,1,float16,fp8,127,0.020235200226306916
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,4,128,1,float16,float16,255,0.02140959948301315
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,4,128,1,float16,fp8,255,0.022737599909305573
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,4,128,1,float16,fp8,2047,0.029809600114822386
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,4,128,1,float16,float16,511,0.02468159943819046
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,4,128,1,float16,fp8,511,0.026521599292755126
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,4,128,1,float16,float16,1023,0.02584800124168396
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,4,128,1,float16,fp8,1023,0.025971201062202454
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,4,128,1,float16,float16,2047,0.027857598662376405
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,8,128,1,float16,float16,1,0.019812799990177155
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,8,128,1,float16,fp8,1,0.02083359956741333
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,8,128,1,float16,float16,3,0.01974080055952072
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,8,128,1,float16,fp8,3,0.020623999834060668
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,8,128,1,float16,float16,7,0.019827200472354888
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,8,128,1,float16,fp8,7,0.02080959975719452
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,8,128,1,float16,float16,15,0.019844800233840942
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,8,128,1,float16,fp8,15,0.02070080041885376
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,8,128,1,float16,float16,31,0.019950400292873382
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,8,128,1,float16,fp8,31,0.020865599811077117
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,8,128,1,float16,float16,63,0.019947199523448943
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,8,128,1,float16,fp8,63,0.020654399693012238
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,8,128,1,float16,float16,127,0.019964799284934998
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,8,128,1,float16,fp8,127,0.02083519995212555
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,8,128,1,float16,float16,255,0.0217631995677948
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,8,128,1,float16,fp8,255,0.02264000028371811
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,8,128,1,float16,float16,511,0.024616000056266785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,8,128,1,float16,fp8,511,0.026163199543952943
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,8,128,1,float16,float16,1023,0.027752000093460082
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,8,128,1,float16,fp8,1023,0.029659199714660644
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,8,128,1,float16,float16,2047,0.04108000099658966
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,128,8,128,1,float16,fp8,2047,0.03524479866027832
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,1,128,1,float16,float16,1,0.07872639894485474
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,1,128,1,float16,fp8,1,0.08267040252685547
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,1,128,1,float16,float16,3,0.07888479828834534
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,1,128,1,float16,fp8,3,0.08382560014724731
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,1,128,1,float16,float16,7,0.07947520017623902
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,1,128,1,float16,fp8,7,0.08406239748001099
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,1,128,1,float16,float16,15,0.08028799891471863
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,1,128,1,float16,fp8,15,0.08406720161437989
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,1,128,1,float16,float16,31,0.08053600192070007
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,1,128,1,float16,fp8,31,0.08473439812660218
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,1,128,1,float16,float16,63,0.08187519907951354
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,1,128,1,float16,fp8,63,0.08597599864006042
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,2,128,1,float16,float16,1,0.10665600299835205
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,2,128,1,float16,fp8,1,0.1184224009513855
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,2,128,1,float16,float16,3,0.10689120292663574
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,2,128,1,float16,fp8,3,0.11857279539108276
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,2,128,1,float16,float16,7,0.106113600730896
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,2,128,1,float16,fp8,7,0.11862239837646485
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,2,128,1,float16,float16,15,0.10717120170593261
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,2,128,1,float16,fp8,15,0.1191167950630188
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,2,128,1,float16,float16,31,0.10926400423049927
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,2,128,1,float16,fp8,31,0.12026879787445069
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,2,128,1,float16,float16,63,0.11054879426956177
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,2,128,1,float16,fp8,63,0.12222399711608886
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,4,128,1,float16,float16,1,0.16387360095977782
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,4,128,1,float16,fp8,1,0.18969440460205078
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,4,128,1,float16,float16,3,0.16446239948272706
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,4,128,1,float16,fp8,3,0.1896864056587219
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,4,128,1,float16,float16,7,0.1646783947944641
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,4,128,1,float16,fp8,31,0.19215199947357178
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,4,128,1,float16,fp8,7,0.18869119882583618
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,4,128,1,float16,float16,15,0.1650928020477295
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,4,128,1,float16,fp8,15,0.1892400026321411
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,4,128,1,float16,float16,31,0.16717439889907837
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,4,128,1,float16,float16,63,0.16987520456314087
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,4,128,1,float16,fp8,63,0.19517279863357545
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,8,128,1,float16,float16,1,0.2879728078842163
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,8,128,1,float16,fp8,1,0.33824160099029543
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,8,128,1,float16,float16,3,0.28902559280395507
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,8,128,1,float16,fp8,3,0.3359055995941162
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,8,128,1,float16,float16,7,0.2880847930908203
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,8,128,1,float16,fp8,7,0.339300799369812
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,8,128,1,float16,float16,15,0.28931679725646975
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,8,128,1,float16,fp8,63,0.3397455930709839
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,8,128,1,float16,fp8,15,0.34005439281463623
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,8,128,1,float16,float16,31,0.2917104005813599
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,8,128,1,float16,fp8,31,0.33968319892883303
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,128,8,128,1,float16,float16,63,0.29332480430603025
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,1,128,1,float16,fp8,7,0.13873599767684935
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,1,128,1,float16,float16,1,0.1271407961845398
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,1,128,1,float16,fp8,1,0.13860000371932985
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,1,128,1,float16,float16,3,0.12796000242233277
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,1,128,1,float16,fp8,3,0.13773119449615479
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,1,128,1,float16,float16,7,0.1275696039199829
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,1,128,1,float16,float16,15,0.1285663962364197
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,1,128,1,float16,fp8,15,0.13968000411987305
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,1,128,1,float16,float16,31,0.13068799972534179
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,1,128,1,float16,fp8,31,0.13922079801559448
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,2,128,1,float16,float16,1,0.1816975951194763
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,2,128,1,float16,fp8,1,0.20685598850250245
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,2,128,1,float16,float16,3,0.18088799715042114
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,2,128,1,float16,fp8,3,0.20698080062866211
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,2,128,1,float16,float16,7,0.18086559772491456
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,2,128,1,float16,fp8,7,0.2067552089691162
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,2,128,1,float16,float16,15,0.1823359966278076
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,2,128,1,float16,fp8,15,0.2075648069381714
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,2,128,1,float16,float16,31,0.1834015965461731
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,2,128,1,float16,fp8,31,0.20951199531555176
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,4,128,1,float16,float16,1,0.2979887962341309
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,4,128,1,float16,fp8,7,0.35085599422454833
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,4,128,1,float16,fp8,1,0.351529598236084
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,4,128,1,float16,float16,3,0.29849920272827146
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,4,128,1,float16,fp8,3,0.3508239984512329
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,4,128,1,float16,float16,7,0.29925758838653566
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,4,128,1,float16,float16,15,0.3010560035705566
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,4,128,1,float16,fp8,15,0.3521696090698242
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,4,128,1,float16,float16,31,0.3010080099105835
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,4,128,1,float16,fp8,31,0.3546047925949097
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,8,128,1,float16,float16,1,0.5428224086761475
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,8,128,1,float16,fp8,1,0.6363872051239013
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,8,128,1,float16,float16,3,0.5417679786682129
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,8,128,1,float16,fp8,3,0.6388895988464356
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,8,128,1,float16,float16,7,0.543126392364502
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,8,128,1,float16,float16,15,0.5381103992462158
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,8,128,1,float16,fp8,7,0.6415647983551025
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,8,128,1,float16,fp8,15,0.6393807888031006
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,1,128,1,float16,float16,1,0.02005600035190582
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,8,128,1,float16,float16,31,0.548363208770752
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,1,128,1,float16,fp8,1,0.020603199303150178
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,128,8,128,1,float16,fp8,31,0.6392367839813232
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,1,128,1,float16,float16,3,0.01968960016965866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,1,128,1,float16,fp8,3,0.02051199972629547
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,1,128,1,float16,float16,7,0.019990399479866028
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,1,128,1,float16,fp8,7,0.020585599541664123
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,1,128,1,float16,float16,15,0.019713599979877473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,1,128,1,float16,fp8,15,0.02066880017518997
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,1,128,1,float16,float16,31,0.019726400077342988
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,1,128,1,float16,fp8,31,0.0205375999212265
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,1,128,1,float16,float16,63,0.019556799530982973
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,1,128,1,float16,fp8,63,0.020552000403404234
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,1,128,1,float16,float16,127,0.019944000244140624
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,1,128,1,float16,fp8,127,0.02051520049571991
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,1,128,1,float16,float16,255,0.02162719964981079
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,1,128,1,float16,fp8,255,0.022620800137519836
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,1,128,1,float16,float16,511,0.024249599874019624
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,1,128,1,float16,fp8,511,0.02640799880027771
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,1,128,1,float16,float16,1023,0.025832000374794006
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,1,128,1,float16,fp8,1023,0.026958400011062623
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,1,128,1,float16,float16,2047,0.027816000580787658
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,1,128,1,float16,fp8,2047,0.02815679907798767
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,2,128,1,float16,float16,1,0.019801600277423857
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,2,128,1,float16,fp8,1,0.02069759964942932
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,2,128,1,float16,float16,3,0.01972000002861023
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,2,128,1,float16,fp8,3,0.02083359956741333
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,2,128,1,float16,float16,7,0.019808000326156615
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,2,128,1,float16,fp8,7,0.02067359983921051
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,2,128,1,float16,float16,15,0.019871999323368073
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,2,128,1,float16,fp8,15,0.02056799978017807
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,2,128,1,float16,float16,31,0.019652800261974336
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,2,128,1,float16,fp8,31,0.020468799769878386
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,2,128,1,float16,float16,63,0.019888000190258028
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,2,128,1,float16,fp8,63,0.020619200170040132
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,2,128,1,float16,float16,127,0.019713599979877473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,2,128,1,float16,fp8,127,0.020479999482631683
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,2,128,1,float16,float16,255,0.021721599996089934
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,2,128,1,float16,fp8,1023,0.026859200000762938
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,2,128,1,float16,fp8,255,0.022700800001621245
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,2,128,1,float16,float16,511,0.024825599789619446
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,2,128,1,float16,fp8,511,0.026587200164794923
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,2,128,1,float16,float16,1023,0.02650400102138519
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,2,128,1,float16,float16,2047,0.028968000411987306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,2,128,1,float16,fp8,2047,0.030632001161575318
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,4,128,1,float16,float16,1,0.020024000108242034
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,4,128,1,float16,fp8,1,0.02078240066766739
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,4,128,1,float16,float16,3,0.01996160000562668
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,4,128,1,float16,fp8,3,0.02077440023422241
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,4,128,1,float16,float16,7,0.020134399831295013
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,4,128,1,float16,fp8,31,0.020688000321388244
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,4,128,1,float16,fp8,7,0.021014399826526642
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,4,128,1,float16,float16,15,0.01979680061340332
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,4,128,1,float16,fp8,15,0.021096000075340272
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,4,128,1,float16,float16,31,0.019823999702930452
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,4,128,1,float16,float16,63,0.0200655996799469
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,4,128,1,float16,float16,127,0.02011200040578842
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,4,128,1,float16,fp8,63,0.020665599405765532
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,4,128,1,float16,fp8,127,0.020683200657367708
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,4,128,1,float16,float16,255,0.02190079987049103
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,4,128,1,float16,fp8,255,0.022763200104236603
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,4,128,1,float16,float16,511,0.02476480007171631
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,4,128,1,float16,fp8,511,0.026771199703216553
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,4,128,1,float16,float16,1023,0.029692798852920532
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,4,128,1,float16,fp8,1023,0.02991040050983429
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,4,128,1,float16,float16,2047,0.04153119921684265
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,4,128,1,float16,fp8,2047,0.035872000455856326
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,8,128,1,float16,float16,1,0.02035840004682541
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,8,128,1,float16,fp8,1,0.02109760046005249
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,8,128,1,float16,float16,3,0.020468799769878386
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,8,128,1,float16,fp8,3,0.021057599782943727
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,8,128,1,float16,float16,7,0.02019840031862259
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,8,128,1,float16,fp8,7,0.021012799441814424
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,8,128,1,float16,float16,15,0.020529599487781526
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,8,128,1,float16,fp8,15,0.021347199380397797
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,8,128,1,float16,float16,31,0.020278400182723998
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,8,128,1,float16,fp8,31,0.021264000236988066
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,8,128,1,float16,float16,63,0.02028000056743622
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,8,128,1,float16,fp8,63,0.021244800090789794
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,8,128,1,float16,float16,127,0.020659199357032774
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,8,128,1,float16,fp8,127,0.021247999370098115
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,8,128,1,float16,float16,255,0.022249600291252135
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,8,128,1,float16,fp8,255,0.023345600068569183
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,8,128,1,float16,float16,511,0.02563839852809906
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,1,128,1,float16,fp8,1,0.021910400688648225
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,8,128,1,float16,fp8,511,0.02727999985218048
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,8,128,1,float16,float16,1023,0.04058080017566681
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,8,128,1,float16,fp8,1023,0.037601599097251893
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,8,128,1,float16,float16,2047,0.056304001808166505
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,128,8,128,1,float16,fp8,2047,0.05362240076065063
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,1,128,1,float16,float16,1,0.02112479954957962
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,1,128,1,float16,float16,3,0.021142399311065672
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,1,128,1,float16,fp8,3,0.021814399957656862
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,1,128,1,float16,float16,7,0.02115039974451065
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,1,128,1,float16,fp8,7,0.021891200542449953
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,1,128,1,float16,float16,15,0.02141920030117035
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,1,128,1,float16,fp8,15,0.02234559953212738
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,1,128,1,float16,float16,31,0.021321600675582884
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,1,128,1,float16,fp8,31,0.022006399929523468
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,1,128,1,float16,float16,63,0.021488000452518464
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,1,128,1,float16,fp8,63,0.022089600563049316
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,1,128,1,float16,float16,127,0.02163359969854355
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,1,128,1,float16,fp8,127,0.02189279943704605
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,1,128,1,float16,float16,255,0.02319999933242798
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,1,128,1,float16,fp8,255,0.02404319941997528
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,1,128,1,float16,float16,511,0.026292800903320312
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,1,128,1,float16,fp8,511,0.02754240036010742
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,1,128,1,float16,float16,1023,0.03544000089168549
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,1,128,1,float16,fp8,1023,0.03357920050621033
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,1,128,1,float16,float16,2047,0.047147199511528015
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,1,128,1,float16,fp8,2047,0.044361600279808046
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,2,128,1,float16,float16,1,0.021561600267887115
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,2,128,1,float16,fp8,1,0.02234880030155182
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,2,128,1,float16,float16,3,0.021388800442218782
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,2,128,1,float16,fp8,3,0.0220208004117012
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,2,128,1,float16,float16,7,0.02133760005235672
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,2,128,1,float16,fp8,7,0.0221791997551918
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,2,128,1,float16,float16,15,0.02150080054998398
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,2,128,1,float16,fp8,15,0.022068800032138826
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,2,128,1,float16,float16,31,0.02156960070133209
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,2,128,1,float16,fp8,31,0.02244960069656372
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,2,128,1,float16,float16,63,0.021329599618911742
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,2,128,1,float16,fp8,63,0.02245440036058426
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,2,128,1,float16,float16,127,0.021587200462818146
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,2,128,1,float16,fp8,127,0.022065599262714387
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,2,128,1,float16,float16,255,0.023425599932670592
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,2,128,1,float16,fp8,255,0.024451200664043427
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,2,128,1,float16,float16,511,0.026876801252365114
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,2,128,1,float16,fp8,511,0.028059199452400208
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,2,128,1,float16,float16,1023,0.042752000689506534
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,2,128,1,float16,fp8,1023,0.0378464013338089
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,4,128,1,float16,float16,3,0.025380799174308778
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,2,128,1,float16,float16,2047,0.05835840106010437
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,2,128,1,float16,fp8,2047,0.05562719702720642
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,4,128,1,float16,float16,1,0.025067201256752013
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,4,128,1,float16,fp8,1,0.027049601078033447
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,4,128,1,float16,fp8,3,0.02689119875431061
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,4,128,1,float16,float16,7,0.025174400210380553
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,4,128,1,float16,fp8,7,0.027014398574829103
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,4,128,1,float16,float16,15,0.025198400020599365
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,4,128,1,float16,fp8,15,0.026918399333953857
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,4,128,1,float16,float16,31,0.025169599056243896
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,4,128,1,float16,fp8,31,0.027142399549484254
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,4,128,1,float16,float16,63,0.025187200307846068
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,4,128,1,float16,fp8,63,0.02691200077533722
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,4,128,1,float16,float16,127,0.025467199087142945
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,4,128,1,float16,fp8,127,0.026796799898147584
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,4,128,1,float16,float16,255,0.028939199447631837
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,4,128,1,float16,fp8,255,0.030857598781585692
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,4,128,1,float16,float16,511,0.04306080043315887
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,4,128,1,float16,fp8,511,0.03953279852867127
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,4,128,1,float16,float16,1023,0.06170240044593811
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,4,128,1,float16,fp8,1023,0.0606112003326416
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,4,128,1,float16,float16,2047,0.09443519711494446
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,4,128,1,float16,fp8,2047,0.0844319999217987
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,8,128,1,float16,float16,1,0.032604798674583435
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,8,128,1,float16,fp8,1,0.03610079884529114
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,8,128,1,float16,float16,3,0.032836800813674925
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,8,128,1,float16,fp8,3,0.035980799794197084
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,8,128,1,float16,float16,7,0.03289119899272919
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,8,128,1,float16,fp8,7,0.035955199599266054
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,8,128,1,float16,float16,15,0.033102399110794066
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,8,128,1,float16,fp8,15,0.03602400124073028
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,8,128,1,float16,float16,31,0.0328000009059906
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,8,128,1,float16,fp8,31,0.03627519905567169
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,8,128,1,float16,float16,63,0.03271200060844422
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,8,128,1,float16,fp8,63,0.03600960075855255
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,8,128,1,float16,float16,127,0.03352479934692383
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,8,128,1,float16,fp8,127,0.036083200573921205
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,8,128,1,float16,float16,255,0.048974400758743285
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,8,128,1,float16,fp8,255,0.0447488009929657
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,8,128,1,float16,float16,511,0.062089598178863524
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,8,128,1,float16,fp8,511,0.06517919898033142
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,8,128,1,float16,float16,1023,0.09826560020446777
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,8,128,1,float16,fp8,1023,0.093094402551651
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,8,128,1,float16,float16,2047,0.15969120264053344
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,96,8,128,1,float16,fp8,2047,0.14192960262298585
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,1,128,1,float16,float16,1,0.01648000031709671
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,1,128,1,float16,fp8,1,0.01712159961462021
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,1,128,1,float16,float16,3,0.016655999422073364
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,1,128,1,float16,fp8,3,0.017319999635219574
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,1,128,1,float16,float16,7,0.016620799899101257
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,1,128,1,float16,fp8,7,0.01761920005083084
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,1,128,1,float16,float16,15,0.016672000288963318
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,1,128,1,float16,fp8,15,0.017502400279045104
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,1,128,1,float16,float16,31,0.016607999801635742
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,1,128,1,float16,fp8,31,0.017195199429988862
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,1,128,1,float16,float16,63,0.01658080071210861
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,1,128,1,float16,fp8,63,0.017294399440288544
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,1,128,1,float16,float16,127,0.01645279973745346
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,1,128,1,float16,fp8,127,0.017500799894332886
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,1,128,1,float16,float16,255,0.01823039948940277
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,1,128,1,float16,fp8,255,0.019540800154209136
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,1,128,1,float16,float16,511,0.021169599890708924
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,1,128,1,float16,fp8,511,0.023324799537658692
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,1,128,1,float16,float16,1023,0.02141280025243759
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,1,128,1,float16,fp8,1023,0.022968000173568724
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,1,128,1,float16,float16,2047,0.021819199621677398
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,1,128,1,float16,fp8,2047,0.02266719937324524
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,2,128,1,float16,float16,1,0.017419199645519256
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,2,128,1,float16,fp8,1,0.017844800651073457
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,2,128,1,float16,float16,3,0.01735360026359558
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,2,128,1,float16,fp8,3,0.017841599881649017
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,2,128,1,float16,float16,7,0.017236800491809846
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,2,128,1,float16,fp8,7,0.017824000120162962
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,2,128,1,float16,float16,15,0.017027199268341064
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,2,128,1,float16,fp8,15,0.01812639981508255
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,2,128,1,float16,float16,31,0.01732960045337677
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,2,128,1,float16,fp8,31,0.0178384006023407
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,2,128,1,float16,float16,511,0.021848000586032867
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,2,128,1,float16,float16,63,0.017403200268745422
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,2,128,1,float16,fp8,63,0.01783200055360794
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,2,128,1,float16,float16,127,0.01725600063800812
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,2,128,1,float16,fp8,127,0.01786559969186783
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,2,128,1,float16,float16,255,0.018857599794864656
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,2,128,1,float16,fp8,255,0.019913600385189058
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,2,128,1,float16,fp8,511,0.02407519966363907
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,2,128,1,float16,float16,1023,0.02168319970369339
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,2,128,1,float16,fp8,1023,0.022862400114536285
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,2,128,1,float16,float16,2047,0.02197919934988022
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,2,128,1,float16,fp8,2047,0.023161600530147552
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,4,128,1,float16,float16,1,0.018811200559139252
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,4,128,1,float16,fp8,1,0.019543999433517457
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,4,128,1,float16,float16,3,0.01897120028734207
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,4,128,1,float16,fp8,3,0.019556799530982973
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,4,128,1,float16,float16,7,0.01887679994106293
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,4,128,1,float16,fp8,7,0.019891199469566346
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,4,128,1,float16,float16,15,0.0188960000872612
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,4,128,1,float16,fp8,15,0.019894400238990785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,4,128,1,float16,float16,31,0.018836799263954162
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,4,128,1,float16,fp8,31,0.019939200580120088
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,4,128,1,float16,float16,63,0.018742400407791137
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,4,128,1,float16,fp8,63,0.01963520050048828
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,4,128,1,float16,float16,127,0.018985599279403687
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,4,128,1,float16,float16,1023,0.023052799701690673
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,4,128,1,float16,fp8,127,0.019555200636386872
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,4,128,1,float16,float16,255,0.02034880071878433
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,4,128,1,float16,fp8,255,0.021831999719142913
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,4,128,1,float16,float16,511,0.023419199883937834
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,4,128,1,float16,fp8,511,0.025455999374389648
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,4,128,1,float16,fp8,1023,0.024719999730587007
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,4,128,1,float16,float16,2047,0.023393599689006804
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,4,128,1,float16,fp8,2047,0.025051200389862062
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,8,128,1,float16,float16,1,0.019006399810314177
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,8,128,1,float16,fp8,1,0.019937600195407867
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,8,128,1,float16,float16,3,0.01896799951791763
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,8,128,1,float16,fp8,3,0.01958079934120178
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,8,128,1,float16,float16,7,0.018985599279403687
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,8,128,1,float16,fp8,7,0.019868800044059755
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,8,128,1,float16,float16,15,0.01868959963321686
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,8,128,1,float16,fp8,15,0.019836799800395967
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,8,128,1,float16,float16,31,0.019094400107860565
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,8,128,1,float16,fp8,31,0.02016319930553436
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,8,128,1,float16,float16,63,0.018935999274253844
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,8,128,1,float16,fp8,63,0.019857600331306458
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,8,128,1,float16,float16,127,0.019044800102710722
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,8,128,1,float16,fp8,127,0.019988800585269927
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,8,128,1,float16,float16,255,0.02065120041370392
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,8,128,1,float16,fp8,255,0.02157440036535263
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,8,128,1,float16,float16,511,0.023817600309848787
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,8,128,1,float16,fp8,511,0.02542720139026642
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,8,128,1,float16,float16,1023,0.023307199776172637
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,8,128,1,float16,fp8,1023,0.024694399535655977
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,8,128,1,float16,float16,2047,0.02372640073299408
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,96,8,128,1,float16,fp8,2047,0.02499520033597946
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,1,128,1,float16,float16,1,0.01748639941215515
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,1,128,1,float16,fp8,1,0.018009600043296815
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,1,128,1,float16,float16,3,0.01717599928379059
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,1,128,1,float16,fp8,3,0.0181536003947258
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,1,128,1,float16,float16,7,0.017417599260807038
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,1,128,1,float16,fp8,7,0.018456000089645385
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,1,128,1,float16,float16,15,0.01736160069704056
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,1,128,1,float16,fp8,15,0.018352000415325163
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,1,128,1,float16,float16,31,0.01735360026359558
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,1,128,1,float16,fp8,31,0.0182096004486084
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,1,128,1,float16,float16,63,0.017395199835300447
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,1,128,1,float16,fp8,63,0.018174399435520173
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,1,128,1,float16,float16,127,0.017291200160980225
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,1,128,1,float16,fp8,127,0.01825280040502548
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,1,128,1,float16,float16,255,0.018939200043678283
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,1,128,1,float16,fp8,255,0.020153599977493285
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,1,128,1,float16,float16,511,0.02196960002183914
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,1,128,1,float16,fp8,511,0.02401120066642761
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,1,128,1,float16,float16,1023,0.022305600345134735
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,1,128,1,float16,fp8,1023,0.02351839989423752
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,1,128,1,float16,float16,2047,0.022543999552726745
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,1,128,1,float16,fp8,2047,0.023996800184249878
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,2,128,1,float16,float16,1,0.019019199907779692
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,2,128,1,float16,fp8,1,0.019687999784946442
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,2,128,1,float16,float16,3,0.018824000656604768
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,2,128,1,float16,fp8,3,0.01971839964389801
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,2,128,1,float16,float16,7,0.018929600715637207
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,2,128,1,float16,fp8,7,0.019592000544071196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,2,128,1,float16,float16,15,0.019113600254058838
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,2,128,1,float16,fp8,15,0.01974239945411682
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,2,128,1,float16,float16,255,0.020983999967575072
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,2,128,1,float16,float16,31,0.018931199610233308
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,2,128,1,float16,fp8,31,0.01983039975166321
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,2,128,1,float16,float16,63,0.019198399782180787
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,2,128,1,float16,fp8,63,0.019596800208091736
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,2,128,1,float16,float16,127,0.018985599279403687
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,2,128,1,float16,fp8,127,0.019926400482654573
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,2,128,1,float16,fp8,255,0.021793599426746368
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,2,128,1,float16,float16,511,0.02390879988670349
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,2,128,1,float16,fp8,511,0.0254256010055542
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,2,128,1,float16,float16,1023,0.02359360009431839
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,2,128,1,float16,fp8,1023,0.025022399425506592
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,2,128,1,float16,float16,2047,0.02428320050239563
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,2,128,1,float16,fp8,2047,0.025249600410461426
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,4,128,1,float16,float16,1,0.019094400107860565
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,4,128,1,float16,fp8,1,0.019875200092792512
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,4,128,1,float16,float16,3,0.019116799533367156
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,4,128,1,float16,fp8,3,0.02003999948501587
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,4,128,1,float16,float16,7,0.019108800590038298
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,4,128,1,float16,fp8,7,0.019732800126075745
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,4,128,1,float16,float16,15,0.01913599967956543
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,4,128,1,float16,fp8,15,0.02005600035190582
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,4,128,1,float16,float16,31,0.019017599523067474
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,4,128,1,float16,fp8,31,0.019835199415683746
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,4,128,1,float16,float16,63,0.019100800156593323
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,4,128,1,float16,fp8,63,0.02011680006980896
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,4,128,1,float16,float16,127,0.019233599305152893
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,4,128,1,float16,fp8,127,0.01956000030040741
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,4,128,1,float16,float16,255,0.020576000213623047
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,4,128,1,float16,fp8,255,0.02215680032968521
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,4,128,1,float16,float16,511,0.023721599578857423
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,4,128,1,float16,fp8,511,0.02560639977455139
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,4,128,1,float16,float16,1023,0.023622399568557738
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,4,128,1,float16,fp8,1023,0.02484800070524216
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,4,128,1,float16,float16,2047,0.02447360008955002
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,4,128,1,float16,fp8,2047,0.025436800718307496
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,8,128,1,float16,float16,1,0.01918399930000305
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,8,128,1,float16,fp8,1,0.019908800721168518
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,8,128,1,float16,float16,3,0.019358399510383605
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,8,128,1,float16,fp8,3,0.01996160000562668
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,8,128,1,float16,float16,7,0.019388799369335175
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,8,128,1,float16,fp8,7,0.01980320066213608
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,8,128,1,float16,float16,15,0.019307200610637665
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,8,128,1,float16,fp8,15,0.020024000108242034
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,8,128,1,float16,float16,31,0.01934240013360977
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,8,128,1,float16,fp8,31,0.019934399425983428
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,8,128,1,float16,float16,63,0.019278399646282196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,8,128,1,float16,fp8,63,0.020032000541687012
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,8,128,1,float16,float16,127,0.019534400105476378
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,8,128,1,float16,fp8,127,0.02003519982099533
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,8,128,1,float16,float16,255,0.020838400721549986
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,8,128,1,float16,fp8,255,0.02191839963197708
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,8,128,1,float16,float16,511,0.023787200450897217
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,8,128,1,float16,fp8,511,0.02584159970283508
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,8,128,1,float16,float16,1023,0.02377600073814392
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,8,128,1,float16,fp8,1023,0.025036799907684325
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,8,128,1,float16,float16,2047,0.025940799713134767
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,96,8,128,1,float16,fp8,2047,0.026144000887870788
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,1,128,1,float16,float16,1,0.019990399479866028
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,1,128,1,float16,fp8,1,0.02109439969062805
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,1,128,1,float16,float16,3,0.02011840045452118
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,1,128,1,float16,fp8,3,0.02122880071401596
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,1,128,1,float16,float16,7,0.020175999402999877
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,1,128,1,float16,fp8,7,0.02101919949054718
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,1,128,1,float16,float16,15,0.020068800449371337
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,1,128,1,float16,fp8,15,0.021454399824142455
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,1,128,1,float16,float16,31,0.019947199523448943
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,1,128,1,float16,fp8,31,0.020868800580501556
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,1,128,1,float16,float16,63,0.02019200026988983
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,1,128,1,float16,fp8,63,0.02086080014705658
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,1,128,1,float16,float16,127,0.02050720006227493
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,1,128,1,float16,fp8,127,0.02119999974966049
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,1,128,1,float16,float16,255,0.02189760059118271
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,1,128,1,float16,fp8,255,0.023148800432682037
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,1,128,1,float16,float16,511,0.025143998861312866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,1,128,1,float16,fp8,511,0.02656320035457611
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,1,128,1,float16,float16,1023,0.02784799933433533
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,1,128,1,float16,fp8,1023,0.02794399857521057
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,1,128,1,float16,float16,2047,0.03269439935684204
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,1,128,1,float16,fp8,2047,0.031763198971748355
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,2,128,1,float16,float16,1,0.020321600139141083
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,2,128,1,float16,fp8,1,0.021350400149822236
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,2,128,1,float16,float16,3,0.02059199959039688
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,2,128,1,float16,fp8,3,0.021243199706077576
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,2,128,1,float16,float16,7,0.020627200603485107
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,2,128,1,float16,fp8,7,0.02115360051393509
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,2,128,1,float16,float16,15,0.020313599705696107
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,2,128,1,float16,fp8,15,0.02126079946756363
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,2,128,1,float16,float16,31,0.020227199792861937
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,2,128,1,float16,fp8,31,0.02125120013952255
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,2,128,1,float16,float16,63,0.02036159932613373
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,2,128,1,float16,fp8,63,0.02107519954442978
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,2,128,1,float16,float16,127,0.020502400398254395
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,2,128,1,float16,fp8,127,0.021457600593566894
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,2,128,1,float16,float16,255,0.02213120013475418
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,2,128,1,float16,fp8,255,0.023148800432682037
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,2,128,1,float16,fp8,511,0.026976001262664796
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,2,128,1,float16,float16,511,0.025383999943733214
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,2,128,1,float16,float16,1023,0.03012799918651581
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,2,128,1,float16,fp8,1023,0.03081600069999695
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,2,128,1,float16,float16,2047,0.043033599853515625
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,2,128,1,float16,fp8,2047,0.03703039884567261
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,4,128,1,float16,float16,1,0.020755200088024138
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,4,128,1,float16,fp8,1,0.02150239944458008
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,4,128,1,float16,float16,3,0.020851199328899384
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,4,128,1,float16,fp8,3,0.021588799357414246
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,4,128,1,float16,float16,7,0.020875200629234314
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,4,128,1,float16,fp8,7,0.021537600457668303
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,4,128,1,float16,float16,15,0.020820799469947814
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,4,128,1,float16,fp8,15,0.02144159972667694
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,4,128,1,float16,float16,31,0.020692799985408784
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,4,128,1,float16,fp8,31,0.021374399960041045
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,4,128,1,float16,float16,63,0.020790399610996248
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,4,128,1,float16,fp8,63,0.02149440050125122
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,4,128,1,float16,float16,127,0.02101760059595108
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,4,128,1,float16,fp8,127,0.021793599426746368
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,4,128,1,float16,float16,255,0.022782400250434875
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,4,128,1,float16,float16,511,0.025931200385093688
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,4,128,1,float16,fp8,255,0.023582400381565095
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,4,128,1,float16,fp8,511,0.027447998523712158
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,4,128,1,float16,float16,1023,0.04098080098628998
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,4,128,1,float16,fp8,1023,0.035183998942375186
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,4,128,1,float16,float16,2047,0.056806397438049314
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,4,128,1,float16,fp8,2047,0.054606401920318605
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,8,128,1,float16,float16,1,0.02463040053844452
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,8,128,1,float16,fp8,1,0.026111999154090883
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,8,128,1,float16,float16,3,0.02446399927139282
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,8,128,1,float16,fp8,3,0.025966399908065797
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,8,128,1,float16,float16,7,0.024875199794769286
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,8,128,1,float16,fp8,7,0.026105600595474242
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,8,128,1,float16,float16,15,0.024702399969100952
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,8,128,1,float16,fp8,15,0.026212799549102783
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,8,128,1,float16,float16,31,0.02481279969215393
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,8,128,1,float16,fp8,31,0.026132801175117494
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,8,128,1,float16,float16,63,0.024560000002384185
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,8,128,1,float16,fp8,63,0.02640480101108551
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,8,128,1,float16,float16,127,0.02481279969215393
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,8,128,1,float16,fp8,127,0.02638239860534668
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,8,128,1,float16,float16,255,0.028387200832366944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,8,128,1,float16,fp8,255,0.03030720055103302
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,8,128,1,float16,float16,511,0.04091359972953797
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,8,128,1,float16,fp8,511,0.03846080005168915
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,8,128,1,float16,float16,1023,0.059303998947143555
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,8,128,1,float16,fp8,1023,0.058676797151565555
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,8,128,1,float16,float16,2047,0.09197120070457458
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,96,8,128,1,float16,fp8,2047,0.08302080035209655
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,1,128,1,float16,float16,1,0.01908160001039505
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,1,128,1,float16,fp8,1,0.020003199577331543
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,1,128,1,float16,float16,3,0.01934559941291809
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,1,128,1,float16,fp8,3,0.019823999702930452
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,1,128,1,float16,float16,7,0.019232000410556793
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,1,128,1,float16,fp8,7,0.020284800231456755
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,1,128,1,float16,float16,15,0.019363200664520262
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,1,128,1,float16,fp8,15,0.020139199495315552
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,1,128,1,float16,float16,31,0.01916320025920868
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,1,128,1,float16,fp8,31,0.01991039961576462
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,1,128,1,float16,float16,63,0.01928640007972717
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,1,128,1,float16,fp8,63,0.02003999948501587
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,1,128,1,float16,float16,127,0.019120000302791595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,1,128,1,float16,fp8,127,0.01993599981069565
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,1,128,1,float16,float16,255,0.020803199708461763
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,1,128,1,float16,fp8,255,0.022305600345134735
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,1,128,1,float16,float16,511,0.023852799832820893
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,1,128,1,float16,fp8,511,0.02566240131855011
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,1,128,1,float16,float16,1023,0.024223999679088594
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,1,128,1,float16,fp8,1023,0.025440001487731935
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,1,128,1,float16,float16,2047,0.024292799830436706
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,1,128,1,float16,fp8,2047,0.02558079957962036
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,2,128,1,float16,float16,1,0.019206400215625762
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,2,128,1,float16,fp8,1,0.019852800667285918
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,2,128,1,float16,float16,3,0.01945919990539551
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,2,128,1,float16,fp8,3,0.0200655996799469
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,2,128,1,float16,float16,7,0.019230400025844575
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,2,128,1,float16,fp8,7,0.01997919976711273
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,2,128,1,float16,float16,15,0.01932000070810318
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,2,128,1,float16,fp8,15,0.020047999918460846
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,2,128,1,float16,float16,31,0.019310399889945984
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,2,128,1,float16,fp8,31,0.019971199333667755
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,2,128,1,float16,float16,63,0.019108800590038298
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,2,128,1,float16,fp8,63,0.019788800179958342
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,2,128,1,float16,float16,127,0.019300800561904908
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,2,128,1,float16,fp8,127,0.019944000244140624
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,2,128,1,float16,float16,255,0.020871999859809875
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,2,128,1,float16,fp8,255,0.021817600727081297
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,2,128,1,float16,float16,511,0.0237184002995491
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,2,128,1,float16,fp8,511,0.025646400451660157
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,2,128,1,float16,float16,1023,0.023998400568962096
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,2,128,1,float16,fp8,1023,0.025174400210380553
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,2,128,1,float16,float16,2047,0.024465599656105043
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,2,128,1,float16,fp8,2047,0.025588798522949218
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,4,128,1,float16,float16,1,0.01905920058488846
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,4,128,1,float16,fp8,1,0.020206399261951447
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,4,128,1,float16,float16,3,0.019172799587249757
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,4,128,1,float16,fp8,3,0.01985599994659424
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,4,128,1,float16,float16,7,0.019256000220775605
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,4,128,1,float16,fp8,7,0.020147199928760528
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,4,128,1,float16,float16,15,0.019278399646282196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,4,128,1,float16,fp8,15,0.020214399695396422
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,4,128,1,float16,float16,31,0.019424000382423402
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,4,128,1,float16,fp8,31,0.019886399805545806
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,4,128,1,float16,float16,63,0.01929119974374771
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,4,128,1,float16,fp8,63,0.019948799908161164
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,4,128,1,float16,float16,127,0.019257600605487823
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,4,128,1,float16,fp8,127,0.020127999782562255
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,4,128,1,float16,float16,255,0.02104319930076599
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,4,128,1,float16,fp8,255,0.022009600698947907
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,4,128,1,float16,float16,511,0.023918400704860687
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,4,128,1,float16,fp8,511,0.025710400938987733
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,4,128,1,float16,float16,1023,0.024009600281715393
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,4,128,1,float16,fp8,1023,0.025065600872039795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,4,128,1,float16,float16,2047,0.02581599950790405
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,4,128,1,float16,fp8,2047,0.0259552001953125
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,8,128,1,float16,float16,1,0.019548800587654114
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,8,128,1,float16,fp8,1,0.020286400616168977
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,8,128,1,float16,float16,3,0.019443200528621675
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,8,128,1,float16,fp8,3,0.020212799310684204
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,8,128,1,float16,float16,7,0.01950719952583313
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,8,128,1,float16,fp8,7,0.02060000002384186
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,8,128,1,float16,float16,15,0.01955839991569519
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,8,128,1,float16,fp8,15,0.020151999592781068
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,8,128,1,float16,float16,31,0.019415999948978423
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,8,128,1,float16,fp8,31,0.020388799905776977
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,8,128,1,float16,float16,63,0.019630399346351624
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,8,128,1,float16,fp8,63,0.020207999646663664
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,8,128,1,float16,float16,127,0.01936960071325302
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,8,128,1,float16,fp8,127,0.020284800231456755
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,8,128,1,float16,float16,255,0.02112800031900406
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,8,128,1,float16,fp8,255,0.02220800071954727
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,8,128,1,float16,float16,511,0.024532799422740937
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,8,128,1,float16,fp8,511,0.026228800415992737
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,8,128,1,float16,float16,1023,0.02555040121078491
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,8,128,1,float16,fp8,1023,0.02585279941558838
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,8,128,1,float16,float16,2047,0.027544000744819643
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,96,8,128,1,float16,fp8,2047,0.02956799864768982
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,1,128,1,float16,float16,1,0.022860799729824067
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,1,128,1,float16,fp8,1,0.02388159930706024
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,1,128,1,float16,float16,3,0.022896000742912294
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,1,128,1,float16,fp8,3,0.023708799481391908
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,1,128,1,float16,float16,7,0.022780799865722658
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,1,128,1,float16,fp8,7,0.02359520047903061
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,1,128,1,float16,float16,15,0.022910399734973906
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,1,128,1,float16,fp8,15,0.023668800294399262
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,1,128,1,float16,float16,31,0.02335200011730194
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,1,128,1,float16,fp8,31,0.023870399594306944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,1,128,1,float16,float16,63,0.022915199398994446
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,1,128,1,float16,fp8,63,0.0237744003534317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,1,128,1,float16,float16,127,0.023550400137901308
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,1,128,1,float16,fp8,127,0.0237184002995491
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,1,128,1,float16,float16,255,0.025179201364517213
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,1,128,1,float16,fp8,255,0.02650879919528961
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,1,128,1,float16,float16,511,0.02828960120677948
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,1,128,1,float16,fp8,511,0.029659199714660644
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,1,128,1,float16,float16,1023,0.045516800880432126
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,1,128,1,float16,fp8,1023,0.04201439917087555
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,2,128,1,float16,float16,1,0.02686559855937958
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,2,128,1,float16,fp8,1,0.02821280062198639
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,2,128,1,float16,float16,3,0.026955199241638184
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,2,128,1,float16,fp8,3,0.028279998898506166
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,2,128,1,float16,float16,7,0.02661919891834259
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,2,128,1,float16,fp8,7,0.02826879918575287
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,2,128,1,float16,float16,15,0.026516801118850707
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,2,128,1,float16,fp8,15,0.028491199016571045
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,2,128,1,float16,float16,255,0.03136000037193298
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,2,128,1,float16,float16,31,0.026740801334381104
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,2,128,1,float16,fp8,31,0.02852480113506317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,2,128,1,float16,float16,63,0.02656480073928833
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,2,128,1,float16,fp8,63,0.028387200832366944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,2,128,1,float16,float16,127,0.026824000477790832
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,2,128,1,float16,fp8,127,0.028454399108886717
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,2,128,1,float16,fp8,255,0.03254239857196808
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,2,128,1,float16,float16,511,0.04568159878253937
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,2,128,1,float16,fp8,511,0.04134880006313324
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,2,128,1,float16,float16,1023,0.06525440216064453
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,2,128,1,float16,fp8,1023,0.06255840063095093
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,4,128,1,float16,float16,1,0.03428960144519806
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,4,128,1,float16,fp8,1,0.03748640120029449
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,4,128,1,float16,float16,3,0.03442080020904541
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,4,128,1,float16,fp8,3,0.03745599985122681
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,4,128,1,float16,float16,7,0.034281599521636966
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,4,128,1,float16,fp8,7,0.037544000148773196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,4,128,1,float16,float16,15,0.03468480110168457
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,4,128,1,float16,fp8,15,0.03752799928188324
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,4,128,1,float16,float16,31,0.03458400070667267
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,4,128,1,float16,fp8,31,0.03759520053863526
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,4,128,1,float16,float16,63,0.03449119925498963
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,4,128,1,float16,fp8,63,0.037931200861930844
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,4,128,1,float16,float16,127,0.03688960075378418
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,4,128,1,float16,fp8,127,0.03793759942054749
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,4,128,1,float16,float16,255,0.05045440196990967
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,4,128,1,float16,fp8,255,0.046691200137138365
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,4,128,1,float16,float16,511,0.0658847987651825
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,4,128,1,float16,fp8,511,0.06655679941177368
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,4,128,1,float16,float16,1023,0.10209920406341552
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,4,128,1,float16,fp8,1023,0.0954367995262146
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,8,128,1,float16,float16,1,0.048860800266265866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,8,128,1,float16,fp8,1,0.05529119968414307
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,8,128,1,float16,float16,3,0.04950399994850159
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,8,128,1,float16,fp8,3,0.05544319748878479
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,8,128,1,float16,float16,7,0.048824000358581546
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,8,128,1,float16,fp8,7,0.055550402402877806
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,8,128,1,float16,float16,15,0.049275198578834535
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,8,128,1,float16,fp8,15,0.05545600056648255
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,8,128,1,float16,float16,31,0.04894079864025116
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,8,128,1,float16,fp8,31,0.05550079941749573
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,8,128,1,float16,float16,63,0.05080320239067078
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,8,128,1,float16,fp8,63,0.05553119778633118
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,8,128,1,float16,float16,127,0.05878559947013855
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,8,128,1,float16,fp8,127,0.05802239775657654
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,8,128,1,float16,float16,255,0.07450399994850158
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,8,128,1,float16,fp8,255,0.07862399816513062
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,8,128,1,float16,float16,511,0.1037168025970459
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,8,128,1,float16,fp8,511,0.10781760215759277
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,8,128,1,float16,float16,1023,0.17051680088043214
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,96,8,128,1,float16,fp8,1023,0.15968159437179566
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,1,128,1,float16,float16,1,0.030011200904846193
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,1,128,1,float16,fp8,1,0.031769600510597226
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,1,128,1,float16,float16,3,0.030296000838279723
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,1,128,1,float16,fp8,3,0.0318015992641449
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,1,128,1,float16,float16,7,0.030246400833129884
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,1,128,1,float16,fp8,7,0.03208799958229065
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,1,128,1,float16,float16,15,0.030187198519706727
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,1,128,1,float16,fp8,15,0.03190079927444458
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,1,128,1,float16,float16,31,0.030118399858474733
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,1,128,1,float16,fp8,31,0.03177280128002167
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,2,128,1,float16,float16,1,0.037238401174545285
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,1,128,1,float16,float16,63,0.030052798986434936
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,1,128,1,float16,fp8,63,0.031744000315666196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,1,128,1,float16,float16,127,0.030177599191665648
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,1,128,1,float16,fp8,127,0.031918400526046754
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,1,128,1,float16,float16,255,0.03809759914875031
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,1,128,1,float16,fp8,255,0.03614560067653656
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,2,128,1,float16,fp8,1,0.04047360122203827
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,2,128,1,float16,float16,3,0.037319999933242795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,2,128,1,float16,fp8,3,0.040489599108695984
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,2,128,1,float16,float16,7,0.03751679956912994
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,2,128,1,float16,fp8,7,0.04052959978580475
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,2,128,1,float16,float16,15,0.03758080005645752
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,2,128,1,float16,fp8,15,0.040731200575828554
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,2,128,1,float16,float16,31,0.03726080060005188
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,2,128,1,float16,fp8,31,0.040468800067901614
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,2,128,1,float16,float16,63,0.037617599964141844
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,2,128,1,float16,fp8,63,0.040612798929214475
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,2,128,1,float16,float16,127,0.042427200078964236
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,2,128,1,float16,fp8,127,0.0418368011713028
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,2,128,1,float16,float16,255,0.055318397283554074
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,2,128,1,float16,fp8,255,0.05315200090408325
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,4,128,1,float16,float16,1,0.051926398277282716
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,4,128,1,float16,fp8,1,0.058455997705459596
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,4,128,1,float16,float16,3,0.0521232008934021
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,4,128,1,float16,fp8,3,0.05852159857749939
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,4,128,1,float16,float16,7,0.05247520208358765
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,4,128,1,float16,fp8,7,0.05840799808502197
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,4,128,1,float16,float16,15,0.052142399549484256
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,4,128,1,float16,fp8,15,0.05849760174751282
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,4,128,1,float16,float16,31,0.05200639963150024
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,4,128,1,float16,fp8,31,0.05853760242462158
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,4,128,1,float16,float16,63,0.05791360139846802
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,4,128,1,float16,fp8,63,0.059894400835037234
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,4,128,1,float16,float16,127,0.06192960143089295
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,4,128,1,float16,fp8,127,0.06391199827194213
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,4,128,1,float16,float16,255,0.0787168025970459
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,4,128,1,float16,fp8,255,0.0819920003414154
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,8,128,1,float16,float16,1,0.08062400221824646
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,8,128,1,float16,fp8,1,0.09452319741249085
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,8,128,1,float16,fp8,15,0.09474719762802124
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,8,128,1,float16,float16,3,0.08107839822769165
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,8,128,1,float16,fp8,3,0.09384480118751526
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,8,128,1,float16,float16,7,0.08096799850463868
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,8,128,1,float16,fp8,63,0.10040960311889649
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,8,128,1,float16,fp8,7,0.09387199878692627
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,8,128,1,float16,float16,15,0.08111199736595154
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,8,128,1,float16,float16,31,0.08294399976730346
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,8,128,1,float16,fp8,31,0.09462239742279052
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,8,128,1,float16,float16,63,0.09122880101203919
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,8,128,1,float16,float16,127,0.09298400282859802
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,8,128,1,float16,fp8,127,0.10623040199279785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,8,128,1,float16,float16,255,0.1234063982963562
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,96,8,128,1,float16,fp8,255,0.13373279571533203
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,1,128,1,float16,float16,1,0.044865599274635314
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,1,128,1,float16,fp8,1,0.04828479886054993
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,1,128,1,float16,float16,3,0.04496000111103058
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,1,128,1,float16,fp8,3,0.04852960109710693
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,1,128,1,float16,float16,7,0.04473280012607574
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,1,128,1,float16,fp8,7,0.048291200399398805
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,1,128,1,float16,float16,15,0.04510239958763122
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,1,128,1,float16,fp8,15,0.048404800891876223
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,1,128,1,float16,float16,31,0.04503360092639923
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,1,128,1,float16,fp8,31,0.04828000068664551
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,1,128,1,float16,float16,63,0.048214399814605714
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,1,128,1,float16,fp8,63,0.04837439954280853
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,1,128,1,float16,float16,127,0.05484960079193115
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,1,128,1,float16,fp8,127,0.049748799204826354
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,2,128,1,float16,float16,1,0.05919359922409058
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,2,128,1,float16,fp8,1,0.06540319919586182
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,2,128,1,float16,float16,3,0.05891039967536926
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,2,128,1,float16,fp8,3,0.06499840021133423
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,2,128,1,float16,float16,7,0.05894719958305359
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,2,128,1,float16,fp8,7,0.06545600295066833
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,2,128,1,float16,float16,15,0.05905119776725769
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,2,128,1,float16,fp8,15,0.0655023992061615
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,2,128,1,float16,float16,31,0.06107360124588013
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,2,128,1,float16,fp8,31,0.0654911994934082
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,2,128,1,float16,float16,63,0.06724159717559815
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,2,128,1,float16,fp8,63,0.06628159880638122
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,2,128,1,float16,float16,127,0.07039520144462585
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,2,128,1,float16,fp8,127,0.0739408016204834
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,4,128,1,float16,float16,1,0.08805599808692932
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,4,128,1,float16,fp8,1,0.10073120594024658
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,4,128,1,float16,float16,3,0.0875711977481842
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,4,128,1,float16,fp8,3,0.10085760354995728
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,4,128,1,float16,float16,7,0.08707360029220582
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,4,128,1,float16,fp8,7,0.10066560506820679
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,4,128,1,float16,float16,15,0.08830879926681519
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,4,128,1,float16,fp8,15,0.10133440494537353
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,4,128,1,float16,float16,31,0.09432160258293151
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,4,128,1,float16,fp8,31,0.10130560398101807
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,4,128,1,float16,float16,63,0.09846879839897156
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,4,128,1,float16,fp8,63,0.11027679443359376
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,4,128,1,float16,float16,127,0.10159519910812378
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,4,128,1,float16,fp8,127,0.114136004447937
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,8,128,1,float16,float16,1,0.1429136037826538
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,8,128,1,float16,fp8,1,0.17020800113677978
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,8,128,1,float16,float16,3,0.14187359809875488
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,8,128,1,float16,fp8,3,0.17055200338363646
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,8,128,1,float16,fp8,31,0.17667200565338134
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,8,128,1,float16,float16,7,0.14433280229568482
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,8,128,1,float16,fp8,7,0.1703727960586548
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,8,128,1,float16,float16,15,0.14947520494461058
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,8,128,1,float16,fp8,15,0.16994719505310057
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,8,128,1,float16,float16,31,0.15711840391159057
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,8,128,1,float16,float16,63,0.15828800201416016
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,8,128,1,float16,fp8,63,0.18424479961395263
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,8,128,1,float16,float16,127,0.162774395942688
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,96,8,128,1,float16,fp8,127,0.19523520469665528
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,1,128,1,float16,float16,1,0.019390399754047393
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,1,128,1,float16,fp8,1,0.019964799284934998
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,1,128,1,float16,float16,3,0.019280000030994414
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,1,128,1,float16,fp8,3,0.02019840031862259
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,1,128,1,float16,float16,7,0.01940000057220459
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,1,128,1,float16,fp8,7,0.02030559927225113
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,1,128,1,float16,float16,15,0.01966399997472763
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,1,128,1,float16,fp8,15,0.020363199710845947
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,1,128,1,float16,float16,31,0.019252799451351166
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,1,128,1,float16,fp8,31,0.019972799718379973
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,1,128,1,float16,float16,63,0.019377599656581878
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,1,128,1,float16,fp8,63,0.02003840059041977
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,1,128,1,float16,float16,127,0.019438399374485014
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,1,128,1,float16,fp8,127,0.020505599677562714
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,1,128,1,float16,float16,255,0.020971199870109557
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,1,128,1,float16,fp8,255,0.022368000447750093
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,1,128,1,float16,float16,511,0.024065600335597993
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,1,128,1,float16,fp8,511,0.025710400938987733
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,1,128,1,float16,float16,1023,0.024539199471473695
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,1,128,1,float16,fp8,1023,0.025732800364494324
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,1,128,1,float16,float16,2047,0.02555040121078491
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,1,128,1,float16,fp8,2047,0.026097598671913146
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,2,128,1,float16,float16,1,0.01927199959754944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,2,128,1,float16,fp8,1,0.020203199982643128
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,2,128,1,float16,float16,3,0.019491200149059296
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,2,128,1,float16,fp8,3,0.020371200144290925
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,2,128,1,float16,float16,7,0.019412800669670105
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,2,128,1,float16,fp8,7,0.020470400154590607
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,2,128,1,float16,float16,15,0.0195375993847847
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,2,128,1,float16,fp8,15,0.020131200551986694
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,2,128,1,float16,float16,31,0.019208000600337984
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,2,128,1,float16,fp8,31,0.020364800095558168
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,2,128,1,float16,float16,63,0.019383999705314636
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,2,128,1,float16,fp8,63,0.02024960070848465
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,2,128,1,float16,float16,127,0.019436800479888917
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,2,128,1,float16,fp8,127,0.02056639939546585
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,2,128,1,float16,float16,255,0.021372799575328828
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,2,128,1,float16,fp8,255,0.022040000557899474
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,2,128,1,float16,float16,511,0.024167999625205994
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,2,128,1,float16,fp8,511,0.02603679895401001
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,2,128,1,float16,float16,1023,0.02431679964065552
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,2,128,1,float16,fp8,1023,0.025467199087142945
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,2,128,1,float16,float16,2047,0.026159998774528504
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,2,128,1,float16,fp8,2047,0.026550400257110595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,4,128,1,float16,float16,1,0.019702400267124175
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,4,128,1,float16,fp8,1,0.020475199818611144
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,4,128,1,float16,float16,3,0.019603200256824493
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,4,128,1,float16,fp8,3,0.020278400182723998
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,4,128,1,float16,float16,7,0.0194255992770195
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,4,128,1,float16,fp8,7,0.020703999698162077
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,4,128,1,float16,float16,15,0.019569599628448488
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,4,128,1,float16,fp8,15,0.020582400262355804
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,4,128,1,float16,float16,31,0.019681599736213685
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,4,128,1,float16,fp8,31,0.020532800257205962
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,4,128,1,float16,float16,63,0.019832000136375427
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,4,128,1,float16,fp8,63,0.020164799690246583
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,4,128,1,float16,float16,127,0.01966879963874817
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,4,128,1,float16,fp8,127,0.02043039947748184
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,4,128,1,float16,float16,255,0.02125920057296753
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,4,128,1,float16,fp8,255,0.022679999470710754
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,4,128,1,float16,float16,511,0.02483679950237274
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,4,128,1,float16,fp8,511,0.02606880068778992
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,4,128,1,float16,float16,1023,0.025908800959587096
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,4,128,1,float16,fp8,1023,0.025990399718284606
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,4,128,1,float16,float16,2047,0.02778719961643219
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,4,128,1,float16,fp8,2047,0.029583999514579774
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,8,128,1,float16,float16,1,0.019840000569820403
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,8,128,1,float16,fp8,1,0.020785599946975708
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,8,128,1,float16,float16,3,0.020001600682735442
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,8,128,1,float16,fp8,3,0.020363199710845947
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,8,128,1,float16,float16,7,0.019817599654197694
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,8,128,1,float16,fp8,7,0.02066880017518997
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,8,128,1,float16,float16,15,0.019974400103092194
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,8,128,1,float16,fp8,15,0.02062560021877289
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,8,128,1,float16,float16,31,0.019865599274635316
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,8,128,1,float16,fp8,31,0.020739200711250304
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,8,128,1,float16,float16,63,0.019945600628852846
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,8,128,1,float16,fp8,63,0.020584000647068022
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,8,128,1,float16,float16,127,0.020147199928760528
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,8,128,1,float16,fp8,127,0.02051360011100769
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,8,128,1,float16,float16,255,0.021512000262737273
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,8,128,1,float16,fp8,255,0.02256480008363724
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,8,128,1,float16,float16,511,0.024875199794769286
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,8,128,1,float16,fp8,511,0.02638239860534668
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,8,128,1,float16,float16,1023,0.02770879864692688
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,8,128,1,float16,fp8,1023,0.02941280007362366
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,8,128,1,float16,float16,2047,0.04073919951915741
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,96,8,128,1,float16,fp8,2047,0.03589600026607513
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,1,128,1,float16,float16,1,0.07543519735336304
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,1,128,1,float16,fp8,1,0.07781760096549988
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,1,128,1,float16,float16,3,0.07516480088233948
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,1,128,1,float16,fp8,3,0.07652159929275512
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,1,128,1,float16,float16,7,0.07607359886169433
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,1,128,1,float16,fp8,7,0.07685440182685851
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,1,128,1,float16,float16,15,0.07626720070838929
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,1,128,1,float16,fp8,15,0.07784640192985534
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,1,128,1,float16,float16,31,0.0771120011806488
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,1,128,1,float16,fp8,31,0.0803167998790741
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,1,128,1,float16,float16,63,0.07880799770355225
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,1,128,1,float16,fp8,63,0.08339999914169312
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,2,128,1,float16,float16,1,0.10332800149917602
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,2,128,1,float16,fp8,1,0.11183040142059326
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,2,128,1,float16,float16,3,0.10356639623641968
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,2,128,1,float16,fp8,3,0.11133439540863037
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,2,128,1,float16,float16,7,0.1041551947593689
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,2,128,1,float16,fp8,7,0.11172959804534913
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,2,128,1,float16,float16,15,0.10420000553131104
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,2,128,1,float16,fp8,63,0.11918720006942748
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,2,128,1,float16,fp8,15,0.11286400556564331
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,2,128,1,float16,float16,31,0.10490720272064209
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,2,128,1,float16,fp8,31,0.11746400594711304
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,2,128,1,float16,float16,63,0.10722559690475464
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,4,128,1,float16,float16,7,0.16171679496765137
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,4,128,1,float16,float16,1,0.16202080249786377
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,4,128,1,float16,fp8,1,0.18320480585098267
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,4,128,1,float16,float16,3,0.16197119951248168
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,4,128,1,float16,fp8,3,0.18363840579986573
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,4,128,1,float16,fp8,7,0.1848415970802307
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,4,128,1,float16,float16,15,0.16180319786071778
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,4,128,1,float16,fp8,15,0.18628959655761718
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,4,128,1,float16,float16,31,0.16512160301208495
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,4,128,1,float16,fp8,31,0.19086240530014037
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,4,128,1,float16,float16,63,0.16701120138168335
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,4,128,1,float16,fp8,63,0.1922544002532959
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,8,128,1,float16,float16,1,0.28666880130767824
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,8,128,1,float16,fp8,1,0.3287071943283081
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,8,128,1,float16,float16,3,0.28491199016571045
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,8,128,1,float16,fp8,3,0.3284111976623535
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,8,128,1,float16,float16,7,0.2871664047241211
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,8,128,1,float16,fp8,7,0.33522560596466067
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,8,128,1,float16,float16,15,0.2879024028778076
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,8,128,1,float16,fp8,15,0.3377023935317993
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,8,128,1,float16,float16,31,0.29048159122467043
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,8,128,1,float16,fp8,31,0.33868000507354734
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,8,128,1,float16,float16,63,0.29111518859863283
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,1,128,1,float16,float16,1,0.12324479818344117
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,96,8,128,1,float16,fp8,63,0.33974881172180177
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,1,128,1,float16,fp8,1,0.13492319583892823
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,1,128,1,float16,float16,3,0.1235152006149292
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,1,128,1,float16,fp8,3,0.1342479944229126
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,1,128,1,float16,float16,7,0.12396960258483887
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,1,128,1,float16,fp8,7,0.13485599756240846
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,1,128,1,float16,float16,15,0.12354559898376465
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,2,128,1,float16,float16,3,0.17704639434814454
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,1,128,1,float16,fp8,15,0.1352895975112915
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,1,128,1,float16,float16,31,0.12492799758911133
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,1,128,1,float16,fp8,31,0.1361039996147156
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,2,128,1,float16,float16,1,0.17665280103683473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,2,128,1,float16,fp8,1,0.20215198993682862
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,2,128,1,float16,fp8,3,0.20103039741516113
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,2,128,1,float16,float16,7,0.1770751953125
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,2,128,1,float16,fp8,7,0.20263841152191162
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,2,128,1,float16,float16,15,0.17766239643096923
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,2,128,1,float16,fp8,15,0.20261600017547607
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,2,128,1,float16,float16,31,0.17872799634933473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,2,128,1,float16,fp8,31,0.20545759201049804
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,4,128,1,float16,float16,1,0.2958431959152222
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,4,128,1,float16,fp8,1,0.3465440034866333
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,4,128,1,float16,float16,3,0.29563040733337403
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,4,128,1,float16,fp8,3,0.34817440509796144
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,4,128,1,float16,float16,7,0.2966655969619751
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,4,128,1,float16,fp8,7,0.34608800411224366
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,4,128,1,float16,float16,15,0.2974031925201416
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,4,128,1,float16,fp8,15,0.349947190284729
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,4,128,1,float16,float16,31,0.29934399127960204
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,4,128,1,float16,fp8,31,0.35057759284973145
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,8,128,1,float16,float16,1,0.5377888202667236
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,8,128,1,float16,fp8,1,0.6435071945190429
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,8,128,1,float16,float16,3,0.5398032188415527
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,8,128,1,float16,fp8,3,0.6392911911010742
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,8,128,1,float16,float16,7,0.5400144100189209
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,8,128,1,float16,fp8,7,0.6409183979034424
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,8,128,1,float16,float16,15,0.5386223793029785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,8,128,1,float16,fp8,15,0.6365087985992431
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,1,128,1,float16,float16,1,0.019681599736213685
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,8,128,1,float16,float16,31,0.5408095836639404
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,1,128,1,float16,fp8,1,0.02038560062646866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,96,8,128,1,float16,fp8,31,0.643940782546997
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,1,128,1,float16,float16,3,0.019636799395084382
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,1,128,1,float16,fp8,3,0.020436799526214598
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,1,128,1,float16,float16,7,0.01980479955673218
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,1,128,1,float16,fp8,7,0.020534400641918183
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,1,128,1,float16,float16,15,0.019499200582504272
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,1,128,1,float16,fp8,15,0.020476800203323365
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,1,128,1,float16,float16,31,0.019784000515937806
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,1,128,1,float16,fp8,31,0.020235200226306916
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,1,128,1,float16,float16,63,0.019627200067043306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,1,128,1,float16,fp8,63,0.02038560062646866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,1,128,1,float16,float16,127,0.0197952002286911
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,1,128,1,float16,fp8,127,0.020470400154590607
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,1,128,1,float16,float16,255,0.021639999747276307
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,1,128,1,float16,fp8,255,0.022511999309062957
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,1,128,1,float16,float16,511,0.024358400702476503
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,1,128,1,float16,fp8,511,0.026105600595474242
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,1,128,1,float16,float16,1023,0.025515198707580566
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,1,128,1,float16,fp8,1023,0.026555201411247252
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,1,128,1,float16,float16,2047,0.027292799949645997
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,1,128,1,float16,fp8,2047,0.02720319926738739
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,2,128,1,float16,float16,1,0.019753600656986236
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,2,128,1,float16,fp8,1,0.02040800005197525
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,2,128,1,float16,float16,3,0.01968960016965866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,2,128,1,float16,fp8,3,0.02054399996995926
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,2,128,1,float16,float16,7,0.01976799964904785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,2,128,1,float16,fp8,7,0.02045599967241287
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,2,128,1,float16,float16,15,0.019819200038909912
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,2,128,1,float16,fp8,15,0.020572799444198608
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,2,128,1,float16,float16,31,0.019702400267124175
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,2,128,1,float16,fp8,31,0.020449599623680113
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,2,128,1,float16,float16,63,0.019760000705718993
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,2,128,1,float16,fp8,63,0.020310400426387785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,2,128,1,float16,float16,127,0.019710400700569154
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,2,128,1,float16,fp8,127,0.0206496000289917
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,2,128,1,float16,float16,255,0.02152000069618225
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,2,128,1,float16,fp8,255,0.022542400658130644
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,2,128,1,float16,float16,511,0.02481600046157837
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,2,128,1,float16,fp8,511,0.026444798707962035
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,2,128,1,float16,float16,1023,0.02640320062637329
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,2,128,1,float16,fp8,1023,0.026491200923919676
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,2,128,1,float16,float16,2047,0.028347200155258177
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,2,128,1,float16,fp8,2047,0.03028959929943085
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,4,128,1,float16,float16,1,0.01993120014667511
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,4,128,1,float16,fp8,1,0.020929600298404693
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,4,128,1,float16,float16,3,0.019753600656986236
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,4,128,1,float16,float16,7,0.02003040015697479
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,4,128,1,float16,fp8,3,0.02091359943151474
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,4,128,1,float16,fp8,7,0.020667199790477753
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,4,128,1,float16,float16,15,0.01987680047750473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,4,128,1,float16,float16,127,0.019990399479866028
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,4,128,1,float16,fp8,15,0.020788800716400147
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,4,128,1,float16,float16,31,0.020051200687885285
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,4,128,1,float16,fp8,31,0.020718400180339814
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,4,128,1,float16,float16,63,0.019974400103092194
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,4,128,1,float16,fp8,63,0.02075680047273636
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,4,128,1,float16,fp8,127,0.020734399557113647
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,4,128,1,float16,float16,255,0.021780799329280853
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,4,128,1,float16,fp8,255,0.023107199370861052
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,4,128,1,float16,float16,511,0.02473759949207306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,4,128,1,float16,fp8,511,0.026681599020957947
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,4,128,1,float16,float16,1023,0.028040000796318056
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,4,128,1,float16,fp8,1023,0.029849600791931153
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,4,128,1,float16,float16,2047,0.04158720076084137
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,4,128,1,float16,fp8,2047,0.03577440083026886
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,8,128,1,float16,float16,1,0.02030719965696335
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,8,128,1,float16,fp8,1,0.02136960029602051
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,8,128,1,float16,float16,3,0.020284800231456755
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,8,128,1,float16,fp8,3,0.02091519981622696
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,8,128,1,float16,float16,7,0.020070399343967437
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,8,128,1,float16,fp8,7,0.021209600567817687
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,8,128,1,float16,float16,15,0.020311999320983886
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,8,128,1,float16,fp8,15,0.02109919935464859
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,8,128,1,float16,float16,31,0.02025599926710129
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,8,128,1,float16,fp8,31,0.021161599457263945
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,8,128,1,float16,float16,63,0.02016959935426712
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,8,128,1,float16,fp8,63,0.02143840044736862
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,8,128,1,float16,float16,127,0.020212799310684204
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,8,128,1,float16,fp8,127,0.02128159999847412
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,8,128,1,float16,float16,255,0.02229759991168976
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,8,128,1,float16,fp8,255,0.023398399353027344
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,8,128,1,float16,float16,511,0.025409600138664244
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,8,128,1,float16,fp8,511,0.02685439884662628
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,8,128,1,float16,float16,1023,0.040089601278305055
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,8,128,1,float16,fp8,1023,0.03513120114803314
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,8,128,1,float16,float16,2047,0.055615997314453124
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,96,8,128,1,float16,fp8,2047,0.054123198986053465
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,1,0.021044799685478212
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,1,0.02172800004482269
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,3,0.021220800280570985
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,3,0.021950399875640868
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,7,0.021241599321365358
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,7,0.022259199619293214
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,15,0.020904000103473663
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,15,0.02204640060663223
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,31,0.02120479941368103
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,31,0.02213920056819916
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,63,0.021558399498462676
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,63,0.02237280011177063
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,127,0.021687999367713928
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,127,0.022092799842357635
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,255,0.02281759977340698
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,255,0.02420320063829422
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,511,0.025961598753929137
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,511,0.02733440101146698
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,1023,0.031188800930976868
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,1023,0.03223359882831574
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,2047,0.04539200067520142
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,2047,0.03944959938526153
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,4095,0.06062399744987488
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,4095,0.05710880160331726
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,1,0.02144639939069748
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,1,0.02223999947309494
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,3,0.021715199947357176
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,3,0.022303999960422517
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,7,0.021427200734615327
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,7,0.022417600452899932
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,15,0.021465599536895752
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,15,0.022353599965572356
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,31,0.021488000452518464
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,31,0.022252799570560457
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,63,0.021724799275398256
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,63,0.02248000055551529
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,127,0.021583999693393707
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,127,0.02223840057849884
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,255,0.023582400381565095
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,255,0.024452799558639528
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,511,0.026732799410820008
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,511,0.028273600339889526
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,1023,0.04233439862728119
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,1023,0.036222401261329654
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,2047,0.05814239978790283
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,2047,0.05482400059700012
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,4095,0.09189440011978149
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,4095,0.0806768000125885
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,1,0.0252703994512558
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,1,0.02680639922618866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,3,0.02529279887676239
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,3,0.026787200570106508
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,7,0.025455999374389648
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,7,0.026678401231765746
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,15,0.025158399343490602
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,15,0.026956799626350402
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,31,0.025328001379966734
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,31,0.026688000559806822
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,63,0.02531520128250122
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,63,0.027008000016212463
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,127,0.02542079985141754
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,127,0.027001601457595826
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,255,0.029236799478530882
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,255,0.03125280141830444
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,511,0.04251680076122284
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,511,0.04193120002746582
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,1023,0.06107040047645569
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,1023,0.06011360287666321
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,2047,0.0939903974533081
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,3,0.03307200074195862
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,2047,0.08396480083465577
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,4095,0.15544960498809815
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,1,0.03308959901332855
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,4095,0.13495359420776368
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,1,0.03584319949150085
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,3,0.03591519892215729
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,7,0.03277600109577179
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,7,0.036134400963783266
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,15,0.033051198720932005
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,15,0.03586240112781525
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,31,0.03304159939289093
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,31,0.03594880104064942
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,63,0.03298079967498779
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,63,0.036057600378990175
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,127,0.03346880078315735
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,127,0.03613120019435882
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,255,0.04887360036373138
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,255,0.044470399618148804
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,511,0.061735999584197995
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,511,0.06462079882621766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,1023,0.09698560237884521
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,1023,0.09276800155639649
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,2047,0.15862079858779907
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,2047,0.1413583993911743
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,4095,0.28016641139984133
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,1,0.016420799493789672
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,4095,0.24246559143066407
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,1,0.017343999445438386
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,3,0.016166399419307708
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,3,0.01719679981470108
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,7,0.016406400501728056
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,7,0.017263999581336974
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,15,0.016492800414562227
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,15,0.017239999771118165
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,31,0.01640480011701584
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,31,0.017267200350761413
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,63,0.016387200355529784
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,63,0.017215999960899352
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,127,0.01653600037097931
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,127,0.01727679967880249
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,255,0.017998400330543517
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,255,0.019088000059127808
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,511,0.021009600162506102
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,511,0.023417599499225616
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,1023,0.02091359943151474
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,1023,0.02232320010662079
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,2047,0.021143999695777894
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,3,0.018011200428009033
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,2047,0.022249600291252135
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,4095,0.022129599750041962
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,4095,0.023182399570941925
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,1,0.01703680008649826
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,1,0.018057599663734436
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,3,0.016974399983882903
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,7,0.017123199999332428
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,7,0.018139199912548067
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,15,0.017257599532604216
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,15,0.01791359931230545
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,31,0.01733119934797287
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,31,0.01804320067167282
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,63,0.017286400496959686
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,63,0.017897599935531618
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,127,0.01727519929409027
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,127,0.017956799268722533
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,255,0.018580800294876097
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,255,0.019944000244140624
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,511,0.021931199729442595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,511,0.02417439967393875
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,1023,0.021454399824142455
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,1023,0.022771200537681578
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,2047,0.021374399960041045
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,2047,0.02284640073776245
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,4095,0.02232639938592911
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,4095,0.023503999412059783
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,1,0.018886399269104005
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,1,0.019784000515937806
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,3,0.01903039962053299
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,3,0.019644799828529357
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,7,0.018889600038528444
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,7,0.019806399941444397
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,15,0.018984000384807586
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,15,0.019814400374889372
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,31,0.0187376007437706
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,31,0.019631999731063842
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,63,0.01892479956150055
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,63,0.019817599654197694
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,127,0.0188511997461319
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,127,0.01974720060825348
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,255,0.02011840045452118
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,255,0.02165440022945404
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,511,0.02343039959669113
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,511,0.025352001190185547
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,1023,0.023254400491714476
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,1023,0.02444480061531067
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,1,0.019758400321006776
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,2047,0.023520000278949738
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,2047,0.02495039999485016
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,4095,0.02449440062046051
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,4095,0.0257968008518219
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,1,0.019075199961662292
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,3,0.01903360038995743
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,3,0.020001600682735442
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,7,0.018991999328136444
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,7,0.019819200038909912
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,15,0.019492800533771514
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,15,0.019947199523448943
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,31,0.019092799723148347
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,31,0.020027199387550355
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,63,0.019145600497722626
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,63,0.019687999784946442
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,127,0.019068799912929535
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,127,0.019865599274635316
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,255,0.0204815998673439
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,255,0.02178879976272583
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,511,0.023588800430297853
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,511,0.025360000133514405
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,1023,0.023071999847888946
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,1023,0.02463199943304062
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,2047,0.023628799617290495
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,2047,0.025038400292396547
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,4095,0.026107200980186464
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,4095,0.026241600513458252
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,1,0.017209599912166595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,1,0.01798879951238632
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,3,0.01706559956073761
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,3,0.01815840005874634
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,7,0.017185600101947786
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,7,0.017875200510025023
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,15,0.017110399901866913
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,15,0.017983999848365784
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,31,0.017207999527454377
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,31,0.018108800053596497
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,63,0.017371200025081635
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,63,0.018302400410175324
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,127,0.017297600209712983
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,127,0.01813279986381531
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,255,0.018779200315475465
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,255,0.020032000541687012
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,511,0.02199999988079071
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,511,0.024102400243282317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,1023,0.021836799383163453
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,1023,0.02300959974527359
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,2047,0.022409600019454957
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,2047,0.023342399299144743
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,4095,0.023127999901771546
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,1,0.019065600633621217
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,4095,0.02430880069732666
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,1,0.01961439996957779
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,3,0.019019199907779692
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,3,0.01957920044660568
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,7,0.01907840073108673
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,7,0.01960960030555725
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,15,0.019121600687503813
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,15,0.01971520036458969
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,31,0.018900799751281738
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,31,0.01990240067243576
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,63,0.019153599441051484
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,63,0.019735999405384064
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,127,0.019123199582099914
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,127,0.019894400238990785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,255,0.020558400452136992
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,255,0.02181600034236908
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,511,0.023628799617290495
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,511,0.02568640112876892
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,1023,0.023395200073719025
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,1023,0.024452799558639528
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,2047,0.023798400163650514
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,2047,0.025097599625587462
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,4095,0.024875199794769286
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,4095,0.025886398553848267
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,1,0.019171200692653656
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,1,0.019870400428771973
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,3,0.01905599981546402
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,3,0.019894400238990785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,7,0.019148799777030944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,7,0.020147199928760528
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,15,0.019299200177192687
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,15,0.02004159986972809
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,31,0.01897760033607483
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,31,0.01993599981069565
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,63,0.01908160001039505
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,63,0.019926400482654573
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,127,0.019182400405406953
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,127,0.020158399641513825
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,255,0.020553599298000335
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,255,0.021910400688648225
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,511,0.023662400245666505
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,511,0.025391998887062072
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,1023,0.02356639951467514
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,1023,0.024828800559043886
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,2047,0.02434239983558655
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,2047,0.025390401482582092
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,4095,0.026009601354599
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,4095,0.026449599862098695
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,1,0.019254399836063384
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,1,0.020017600059509276
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,3,0.019225600361824035
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,3,0.020017600059509276
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,7,0.019351999461650848
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,7,0.019937600195407867
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,15,0.019208000600337984
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,15,0.019939200580120088
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,31,0.019284799695014954
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,31,0.01987359970808029
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,63,0.019123199582099914
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,63,0.020161600410938264
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,127,0.019420799612998963
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,127,0.02009280025959015
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,255,0.020891200006008147
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,255,0.02205599993467331
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,511,0.023731200397014617
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,511,0.02566719949245453
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,1023,0.02390879988670349
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,1023,0.024748800694942473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,2047,0.025726398825645445
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,2047,0.02589600086212158
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,4095,0.029150399565696716
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,4095,0.029811200499534608
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,1,0.020073600113391876
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,1,0.02112320065498352
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,3,0.020239999890327452
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,31,0.02091200053691864
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,3,0.021376000344753267
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,7,0.02030719965696335
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,7,0.021374399960041045
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,15,0.019985599815845488
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,15,0.02088959962129593
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,31,0.020155200362205507
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,63,0.01995519995689392
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,63,0.021328000724315642
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,127,0.020479999482631683
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,127,0.021252800524234772
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,255,0.022014400362968443
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,255,0.02311359941959381
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,511,0.02550719976425171
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,511,0.02664799988269806
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,1023,0.027139198780059815
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,1023,0.0272271990776062
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,2047,0.029475200176239013
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,2047,0.031079998612403868
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,4095,0.043350398540496826
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,4095,0.042203199863433835
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,1,0.020467199385166168
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,1,0.02117439955472946
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,3,0.020582400262355804
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,3,0.021580800414085388
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,7,0.020534400641918183
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,7,0.021220800280570985
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,15,0.020295999944210052
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,15,0.021512000262737273
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,31,0.020550400018692017
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,31,0.02110240012407303
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,63,0.02038239985704422
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,63,0.021423999965190888
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,127,0.020611199736595153
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,127,0.021161599457263945
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,255,0.022232000529766083
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,255,0.023350399732589722
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,511,0.02542079985141754
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,511,0.02688319981098175
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,1023,0.02889919877052307
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,1023,0.030436798930168152
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,2047,0.0424591988325119
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,2047,0.037636798620223996
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,4095,0.05876320004463196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,4095,0.055471998453140256
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,1,0.020588800311088562
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,1,0.021796800196170807
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,3,0.020822399854660036
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,3,0.021798400580883025
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,7,0.020457600057125092
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,7,0.021835200488567352
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,15,0.020795199275016784
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,15,0.021622399985790252
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,127,0.021107199788093566
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,31,0.020763200521469117
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,31,0.02141920030117035
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,63,0.020710399746894835
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,63,0.02136320024728775
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,127,0.02208320051431656
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,255,0.02255840003490448
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,255,0.023827199637889863
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,511,0.026017600297927858
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,511,0.027806401252746582
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,1023,0.040575999021530154
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,1023,0.0367680013179779
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,2047,0.05631200075149536
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,2047,0.054574400186538696
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,4095,0.09001119732856751
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,4095,0.07946879863739013
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,1,0.024702399969100952
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,1,0.02611519992351532
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,3,0.024481600522994994
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,3,0.026150399446487428
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,7,0.024929599463939668
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,7,0.026180800795555115
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,15,0.024643200635910033
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,15,0.026137599349021913
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,31,0.024600000679492952
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,31,0.026126399636268616
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,63,0.02455040067434311
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,63,0.02616640031337738
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,127,0.024806399643421174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,127,0.02643359899520874
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,255,0.028438401222229005
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,255,0.03023360073566437
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,511,0.040870401263237
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,511,0.0384880006313324
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,1023,0.05942400097846985
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,1023,0.05902079939842224
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,2047,0.09167519807815552
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,2047,0.08360159993171692
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,4095,0.15354399681091307
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,1,0.018913599848747253
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,4095,0.13423360586166383
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,1,0.019964799284934998
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,3,0.019068799912929535
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,31,0.019921599328517912
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,3,0.019815999269485473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,7,0.019172799587249757
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,7,0.01989919990301132
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,15,0.01918880045413971
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,15,0.019521600008010863
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,31,0.019089600443840025
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,63,0.01915999948978424
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,63,0.02008160054683685
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,127,0.01914079934358597
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,127,0.01998720020055771
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,255,0.020684799551963805
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,255,0.02173279970884323
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,511,0.023710399866104126
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,511,0.02547839879989624
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,1023,0.023876799643039702
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,1023,0.02510400116443634
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,2047,0.023963199555873872
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,2047,0.025600001215934753
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,4095,0.024843199551105498
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,4095,0.025942400097846985
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,1,0.0190528005361557
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,15,0.01913439929485321
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,1,0.01979839950799942
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,3,0.01921759992837906
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,3,0.019952000677585603
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,7,0.018969599902629853
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,7,0.019875200092792512
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,15,0.019939200580120088
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,31,0.019182400405406953
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,31,0.019947199523448943
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,63,0.019126400351524353
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,63,0.020180800557136537
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,127,0.018987199664115904
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,127,0.019780799746513367
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,255,0.020761600136756896
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,255,0.021823999285697938
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,511,0.023865599930286408
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,511,0.025385600328445435
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,1023,0.023830400407314302
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,1023,0.025047999620437623
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,2047,0.024275200068950654
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,2047,0.025481599569320678
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,4095,0.026340800523757934
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,4095,0.026416000723838807
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,1,0.01934400051832199
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,1,0.019923199713230134
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,3,0.01927199959754944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,3,0.0200655996799469
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,7,0.019299200177192687
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,7,0.019844800233840942
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,15,0.019251200556755065
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,15,0.01991039961576462
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,31,0.019244800508022308
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,31,0.02006399929523468
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,63,0.01937119960784912
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,63,0.02009119987487793
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,127,0.019232000410556793
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,127,0.020263999700546265
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,255,0.020979200303554536
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,255,0.021931199729442595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,511,0.02388319969177246
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,511,0.026395198702812196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,1023,0.023846399784088135
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,1023,0.025177600979804992
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,2047,0.02550880014896393
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,2047,0.025982400774955748
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,4095,0.0291920006275177
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,4095,0.029499199986457825
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,1,0.019534400105476378
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,1,0.020308800041675568
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,3,0.01940159946680069
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,3,0.020283199846744537
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,7,0.019494399428367615
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,7,0.02024960070848465
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,15,0.019254399836063384
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,15,0.020476800203323365
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,31,0.01942239999771118
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,31,0.020147199928760528
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,63,0.019495999813079833
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,63,0.02024800032377243
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,127,0.019310399889945984
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,127,0.02035519927740097
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,255,0.021086399257183076
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,255,0.022729599475860597
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,511,0.024278399348258973
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,511,0.025868800282478333
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,1023,0.025412800908088683
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,1023,0.025729599595069885
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,2047,0.027752000093460082
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,2047,0.029334399104118346
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,4095,0.040531200170516965
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,4095,0.0353632003068924
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,1,128,1,float16,float16,1,0.022835199534893037
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,1,128,1,float16,fp8,1,0.02353920042514801
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,1,128,1,float16,float16,3,0.022735999524593355
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,1,128,1,float16,fp8,3,0.02359199970960617
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,1,128,1,float16,float16,7,0.023163199424743652
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,1,128,1,float16,fp8,7,0.023601600527763368
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,1,128,1,float16,float16,15,0.023387199640274046
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,1,128,1,float16,fp8,15,0.024033600091934205
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,1,128,1,float16,float16,31,0.022932800650596618
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,1,128,1,float16,fp8,31,0.02383680045604706
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,1,128,1,float16,float16,63,0.022716799378395082
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,1,128,1,float16,fp8,63,0.02359039932489395
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,1,128,1,float16,float16,127,0.023148800432682037
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,1,128,1,float16,fp8,127,0.02385440021753311
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,1,128,1,float16,float16,255,0.025275200605392456
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,1,128,1,float16,fp8,255,0.026225599646568298
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,1,128,1,float16,float16,511,0.029547199606895447
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,1,128,1,float16,fp8,511,0.029436799883842468
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,1,128,1,float16,float16,1023,0.044710400700569156
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,1,128,1,float16,fp8,1023,0.039617601037025454
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,1,128,1,float16,float16,2047,0.060864001512527466
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,1,128,1,float16,fp8,2047,0.056676799058914186
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,2,128,1,float16,float16,1,0.02690559923648834
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,2,128,1,float16,fp8,1,0.028542399406433105
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,2,128,1,float16,float16,3,0.026683199405670165
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,2,128,1,float16,fp8,3,0.02828960120677948
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,2,128,1,float16,float16,7,0.026422399282455444
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,2,128,1,float16,fp8,7,0.02808000147342682
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,2,128,1,float16,float16,15,0.026713600754737853
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,2,128,1,float16,fp8,15,0.028214401006698607
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,2,128,1,float16,float16,31,0.02698880136013031
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,2,128,1,float16,fp8,31,0.028459200263023378
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,2,128,1,float16,float16,63,0.026793599128723145
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,2,128,1,float16,fp8,63,0.028278398513793945
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,2,128,1,float16,float16,127,0.02674719989299774
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,2,128,1,float16,fp8,127,0.02847839891910553
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,2,128,1,float16,float16,255,0.030776000022888182
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,2,128,1,float16,fp8,255,0.03228960037231445
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,2,128,1,float16,float16,511,0.045371198654174806
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,2,128,1,float16,fp8,511,0.04312799870967865
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,2,128,1,float16,float16,1023,0.06411679983139038
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,2,128,1,float16,fp8,1023,0.06213279962539673
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,2,128,1,float16,float16,2047,0.09639520049095154
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,2,128,1,float16,fp8,2047,0.08660640120506287
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,4,128,1,float16,float16,1,0.03447679877281189
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,4,128,1,float16,fp8,1,0.037427198886871335
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,4,128,1,float16,float16,3,0.03462400138378143
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,4,128,1,float16,fp8,3,0.03787679970264435
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,4,128,1,float16,float16,7,0.03460319936275482
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,4,128,1,float16,fp8,7,0.03762080073356629
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,4,128,1,float16,float16,15,0.03427680134773255
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,4,128,1,float16,fp8,15,0.0377487987279892
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,4,128,1,float16,float16,31,0.03450239896774292
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,4,128,1,float16,fp8,31,0.03746559917926788
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,4,128,1,float16,float16,63,0.034595200419425966
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,4,128,1,float16,fp8,63,0.03787040114402771
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,4,128,1,float16,float16,511,0.06518719792366028
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,4,128,1,float16,float16,127,0.03473120033740997
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,4,128,1,float16,fp8,127,0.03786559998989105
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,4,128,1,float16,float16,255,0.05050560235977173
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,4,128,1,float16,fp8,255,0.04691520035266876
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,4,128,1,float16,fp8,511,0.0663312017917633
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,4,128,1,float16,float16,1023,0.1005903959274292
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,4,128,1,float16,fp8,1023,0.09421280026435852
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,8,128,1,float16,float16,3,0.04914720058441162
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,4,128,1,float16,float16,2047,0.16231679916381836
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,4,128,1,float16,fp8,2047,0.14297280311584473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,8,128,1,float16,float16,1,0.04880160093307495
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,8,128,1,float16,fp8,1,0.055035197734832765
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,8,128,1,float16,fp8,3,0.055851197242736815
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,8,128,1,float16,float16,7,0.048553600907325745
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,8,128,1,float16,fp8,7,0.055383998155593875
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,8,128,1,float16,float16,15,0.048974400758743285
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,8,128,1,float16,fp8,15,0.05528479814529419
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,8,128,1,float16,float16,31,0.04906400144100189
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,8,128,1,float16,fp8,31,0.055435198545455935
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,8,128,1,float16,float16,63,0.05125759840011597
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,8,128,1,float16,fp8,63,0.05512639880180359
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,8,128,1,float16,float16,511,0.10376319885253907
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,8,128,1,float16,float16,127,0.05714719891548157
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,8,128,1,float16,fp8,127,0.05693920254707337
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,8,128,1,float16,float16,255,0.07416639924049377
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,8,128,1,float16,fp8,255,0.07812960147857666
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,8,128,1,float16,fp8,511,0.1075808048248291
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,8,128,1,float16,float16,1023,0.1692016005516052
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,8,128,1,float16,fp8,1023,0.15984799861907958
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,1,128,1,float16,float16,1,0.029979199171066284
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,8,128,1,float16,float16,2047,0.2922303915023804
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,64,8,128,1,float16,fp8,2047,0.2531888008117676
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,1,128,1,float16,fp8,1,0.031409600377082826
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,1,128,1,float16,float16,3,0.029943999648094178
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,1,128,1,float16,fp8,3,0.03136959969997406
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,1,128,1,float16,float16,7,0.029955199360847472
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,1,128,1,float16,fp8,7,0.03192960023880005
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,1,128,1,float16,float16,15,0.03022400140762329
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,1,128,1,float16,fp8,15,0.031995201110839845
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,1,128,1,float16,float16,31,0.02974880039691925
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,1,128,1,float16,fp8,31,0.03185920119285583
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,1,128,1,float16,float16,63,0.030273601412773132
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,1,128,1,float16,fp8,63,0.03173440098762512
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,1,128,1,float16,float16,127,0.030073601007461547
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,1,128,1,float16,fp8,127,0.03186239898204803
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,1,128,1,float16,float16,255,0.03445279896259308
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,1,128,1,float16,fp8,255,0.03635680079460144
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,1,128,1,float16,float16,511,0.04888159930706024
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,1,128,1,float16,fp8,511,0.04407039880752563
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,2,128,1,float16,float16,1,0.03751679956912994
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,2,128,1,float16,fp8,1,0.04049600064754486
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,2,128,1,float16,float16,3,0.03734720051288605
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,2,128,1,float16,fp8,3,0.040659201145172116
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,2,128,1,float16,float16,7,0.03735199868679047
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,2,128,1,float16,fp8,7,0.04084640145301819
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,2,128,1,float16,float16,15,0.037363201379776
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,2,128,1,float16,fp8,15,0.04088320136070252
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,2,128,1,float16,float16,31,0.0375216007232666
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,2,128,1,float16,fp8,31,0.04045760035514832
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,2,128,1,float16,float16,63,0.037308800220489505
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,2,128,1,float16,fp8,63,0.04083200097084046
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,2,128,1,float16,float16,511,0.06957280039787292
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,2,128,1,float16,float16,127,0.039208000898361205
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,2,128,1,float16,fp8,127,0.04130400121212006
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,2,128,1,float16,float16,255,0.053862398862838744
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,2,128,1,float16,fp8,255,0.051412802934646604
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,2,128,1,float16,fp8,511,0.07024959921836853
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,4,128,1,float16,float16,1,0.05190879702568054
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,4,128,1,float16,fp8,1,0.058310401439666745
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,4,128,1,float16,float16,3,0.052172797918319705
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,4,128,1,float16,fp8,3,0.058475202322006224
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,4,128,1,float16,float16,7,0.052190399169921874
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,4,128,1,float16,float16,63,0.055174398422241214
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,4,128,1,float16,fp8,7,0.058855998516082766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,4,128,1,float16,float16,15,0.05243679881095886
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,4,128,1,float16,fp8,15,0.05848479866981506
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,4,128,1,float16,float16,31,0.051995199918746945
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,4,128,1,float16,fp8,31,0.05839040279388428
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,4,128,1,float16,fp8,63,0.058710402250289916
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,4,128,1,float16,float16,127,0.061289602518081666
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,4,128,1,float16,fp8,127,0.061003202199935914
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,4,128,1,float16,float16,255,0.0775983989238739
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,4,128,1,float16,fp8,255,0.08133119940757752
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,4,128,1,float16,float16,511,0.10739519596099853
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,4,128,1,float16,fp8,511,0.11030240058898926
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,8,128,1,float16,float16,1,0.08060320019721985
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,8,128,1,float16,fp8,1,0.09423199892044068
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,8,128,1,float16,float16,3,0.08103039860725403
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,8,128,1,float16,fp8,3,0.09454560279846191
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,8,128,1,float16,float16,7,0.08031039834022521
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,8,128,1,float16,fp8,7,0.09410399794578553
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,8,128,1,float16,float16,15,0.08090559840202331
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,8,128,1,float16,fp8,15,0.09413120150566101
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,8,128,1,float16,float16,31,0.08224800229072571
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,8,128,1,float16,fp8,31,0.09414719939231872
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,8,128,1,float16,float16,63,0.09049919843673707
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,8,128,1,float16,fp8,255,0.1329648017883301
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,8,128,1,float16,fp8,63,0.09658719897270203
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,8,128,1,float16,float16,127,0.09208160042762756
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,8,128,1,float16,fp8,127,0.10612159967422485
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,8,128,1,float16,float16,255,0.12291040420532226
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,8,128,1,float16,float16,511,0.18077600002288818
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,64,8,128,1,float16,fp8,511,0.19360640048980712
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,1,128,1,float16,float16,1,0.04442879855632782
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,1,128,1,float16,fp8,7,0.04760800004005432
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,1,128,1,float16,fp8,1,0.047603198885917665
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,1,128,1,float16,float16,3,0.044500800967216494
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,1,128,1,float16,fp8,3,0.04811359941959381
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,1,128,1,float16,float16,7,0.04451040029525757
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,1,128,1,float16,float16,15,0.044809600710868834
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,1,128,1,float16,fp8,15,0.04786880016326904
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,1,128,1,float16,float16,31,0.04456160068511963
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,1,128,1,float16,fp8,31,0.047942399978637695
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,1,128,1,float16,float16,63,0.045480000972747806
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,1,128,1,float16,fp8,63,0.04805760085582733
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,1,128,1,float16,float16,127,0.05181760191917419
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,1,128,1,float16,fp8,127,0.04830560088157654
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,1,128,1,float16,float16,255,0.06271039843559265
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,1,128,1,float16,fp8,255,0.0613647997379303
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,2,128,1,float16,float16,1,0.05898560285568237
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,2,128,1,float16,fp8,1,0.065038400888443
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,2,128,1,float16,float16,3,0.058976000547409056
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,2,128,1,float16,fp8,3,0.06520159840583802
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,2,128,1,float16,float16,7,0.058873599767684935
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,2,128,1,float16,fp8,7,0.06492800116539002
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,2,128,1,float16,float16,15,0.05905439853668213
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,2,128,1,float16,fp8,15,0.06530879735946656
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,2,128,1,float16,float16,31,0.059248000383377075
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,2,128,1,float16,fp8,31,0.06527519822120667
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,2,128,1,float16,float16,63,0.06536960005760192
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,2,128,1,float16,fp8,63,0.06528480052947998
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,2,128,1,float16,float16,127,0.06886559724807739
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,2,128,1,float16,fp8,127,0.07106559872627258
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,2,128,1,float16,float16,255,0.08650559782981873
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,2,128,1,float16,fp8,255,0.08669599890708923
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,4,128,1,float16,float16,1,0.0876591980457306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,4,128,1,float16,fp8,1,0.10031360387802124
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,4,128,1,float16,float16,3,0.08807520270347595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,4,128,1,float16,fp8,3,0.10024800300598144
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,4,128,1,float16,float16,7,0.08763359785079956
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,4,128,1,float16,fp8,7,0.1000432014465332
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,4,128,1,float16,float16,15,0.08802559971809387
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,4,128,1,float16,fp8,15,0.10052319765090942
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,4,128,1,float16,fp8,127,0.1124959945678711
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,4,128,1,float16,float16,31,0.0898256003856659
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,4,128,1,float16,fp8,31,0.10075839757919311
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,4,128,1,float16,float16,63,0.09752479791641236
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,4,128,1,float16,fp8,63,0.10814399719238281
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,4,128,1,float16,float16,127,0.10033119916915893
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,4,128,1,float16,float16,255,0.12985759973526
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,4,128,1,float16,fp8,255,0.1394991993904114
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,8,128,1,float16,float16,1,0.14125920534133912
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,8,128,1,float16,fp8,1,0.16995359659194947
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,8,128,1,float16,float16,3,0.1424623966217041
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,8,128,1,float16,fp8,3,0.17034080028533935
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,8,128,1,float16,float16,7,0.14206240177154542
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,8,128,1,float16,fp8,31,0.17230240106582642
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,8,128,1,float16,fp8,7,0.17020319700241088
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,8,128,1,float16,float16,15,0.1443727970123291
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,8,128,1,float16,fp8,15,0.1701856017112732
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,8,128,1,float16,float16,31,0.15270880460739136
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,8,128,1,float16,float16,63,0.1569823980331421
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,8,128,1,float16,fp8,63,0.1844688057899475
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,8,128,1,float16,float16,127,0.1610640048980713
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,8,128,1,float16,fp8,127,0.19164639711380005
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,8,128,1,float16,float16,255,0.21842401027679442
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,1,0.019337600469589232
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,64,8,128,1,float16,fp8,255,0.24090240001678467
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,1,0.020147199928760528
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,3,0.019467200338840484
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,3,0.02014559954404831
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,7,0.01953279972076416
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,7,0.020316800475120543
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,15,0.019275200366973878
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,15,0.019993600249290467
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,31,0.019278399646282196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,31,0.0200095996260643
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,63,0.01943040043115616
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,63,0.02033119946718216
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,127,0.019310399889945984
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,127,0.020080000162124634
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,255,0.020795199275016784
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,255,0.022276799380779266
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,511,0.024027200043201448
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,511,0.025916799902915955
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,1023,0.024129599332809448
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,1023,0.025561600923538208
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,2047,0.024959999322891235
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,2047,0.025670400261878966
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,4095,0.026947200298309326
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,4095,0.02675360143184662
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,1,0.020099200308322906
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,1,0.019431999325752257
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,15,0.02017440050840378
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,3,0.01934719979763031
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,3,0.020022399723529816
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,7,0.01964319944381714
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,7,0.019852800667285918
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,15,0.019467200338840484
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,31,0.019414399564266206
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,31,0.02019519954919815
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,63,0.019368000328540802
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,63,0.020084799826145174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,127,0.019280000030994414
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,127,0.020080000162124634
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,255,0.020948800444602966
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,255,0.022147199511528014
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,511,0.0240447998046875
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,511,0.02635999917984009
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,1023,0.024166400730609893
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,1023,0.024991999566555022
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,2047,0.025860801339149475
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,2047,0.026214399933815004
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,4095,0.029516801238059998
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,4095,0.029708799719810487
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,1,0.019659200310707094
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,1,0.020483200252056123
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,3,0.019486400485038757
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,3,0.020231999456882477
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,7,0.019659200310707094
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,7,0.0204352006316185
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,15,0.019508799910545348
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,15,0.020556800067424774
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,31,0.019782400131225585
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,31,0.020300799608230592
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,63,0.019622400403022766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,63,0.02027360051870346
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,127,0.019529600441455842
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,127,0.020401600003242492
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,255,0.021539199352264404
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,255,0.022361600399017335
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,511,0.024452799558639528
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,511,0.026075199246406555
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,1023,0.025595200061798096
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,1023,0.02585279941558838
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,2047,0.027438399195671082
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,2047,0.029790401458740234
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,4095,0.04084640145301819
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,4095,0.035924801230430604
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,1,0.01995519995689392
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,1,0.020667199790477753
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,3,0.019897599518299103
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,3,0.020452800393104553
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,7,0.019547200202941893
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,7,0.02067680060863495
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,127,0.020076799392700195
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,15,0.0199535995721817
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,15,0.020635199546813966
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,31,0.020054399967193604
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,31,0.020796799659729005
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,63,0.01979839950799942
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,63,0.020652799308300017
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,127,0.02083040028810501
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,255,0.02176959961652756
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,255,0.022755199670791627
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,511,0.024915200471878052
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,511,0.026345598697662353
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,1023,0.027823999524116516
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,1023,0.02954080104827881
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,2047,0.040862399339675906
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,2047,0.035504001379013064
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,4095,0.05663679838180542
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,4095,0.05481119751930237
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,1,128,1,float16,float16,1,0.06761760115623475
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,1,128,1,float16,fp8,1,0.07287039756774902
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,1,128,1,float16,float16,3,0.06713759899139404
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,1,128,1,float16,fp8,3,0.07295519709587098
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,1,128,1,float16,fp8,31,0.07359359860420227
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,1,128,1,float16,float16,7,0.06698240041732788
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,1,128,1,float16,fp8,63,0.07863039970397949
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,1,128,1,float16,fp8,7,0.073471999168396
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,1,128,1,float16,fp8,127,0.08207359910011292
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,1,128,1,float16,float16,15,0.06839519739151001
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,1,128,1,float16,fp8,15,0.07309120297431945
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,1,128,1,float16,float16,31,0.07111679911613464
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,1,128,1,float16,float16,63,0.07566720247268677
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,1,128,1,float16,float16,127,0.07986879944801331
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,2,128,1,float16,float16,1,0.09628159999847412
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,2,128,1,float16,fp8,1,0.10793119668960571
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,2,128,1,float16,float16,3,0.09512159824371338
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,2,128,1,float16,fp8,3,0.10697760581970214
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,2,128,1,float16,float16,7,0.09659519791603088
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,2,128,1,float16,fp8,7,0.1075935959815979
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,2,128,1,float16,float16,15,0.09865760207176208
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,2,128,1,float16,fp8,15,0.107532799243927
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,2,128,1,float16,float16,31,0.10309280157089233
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,2,128,1,float16,fp8,31,0.11072319746017456
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,2,128,1,float16,float16,63,0.10480959415435791
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,2,128,1,float16,fp8,63,0.11747679710388184
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,2,128,1,float16,float16,127,0.10963679552078247
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,2,128,1,float16,fp8,127,0.11970399618148804
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,4,128,1,float16,float16,1,0.1531167984008789
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,4,128,1,float16,float16,15,0.15948159694671632
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,4,128,1,float16,fp8,1,0.17605119943618774
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,4,128,1,float16,float16,3,0.15345439910888672
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,4,128,1,float16,fp8,3,0.17735999822616577
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,4,128,1,float16,float16,7,0.15416959524154664
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,4,128,1,float16,fp8,7,0.1769215941429138
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,4,128,1,float16,fp8,15,0.17809280157089233
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,4,128,1,float16,float16,31,0.16218240261077882
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,4,128,1,float16,fp8,31,0.18542399406433105
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,4,128,1,float16,float16,63,0.16366080045700074
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,4,128,1,float16,fp8,63,0.1904911994934082
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,4,128,1,float16,float16,127,0.16606719493865968
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,4,128,1,float16,fp8,127,0.19520959854125977
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,8,128,1,float16,float16,1,0.2724064111709595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,8,128,1,float16,fp8,1,0.31362080574035645
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,8,128,1,float16,float16,3,0.2717056035995483
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,8,128,1,float16,fp8,3,0.3138751983642578
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,8,128,1,float16,float16,7,0.2777199983596802
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,8,128,1,float16,fp8,7,0.31309919357299804
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,8,128,1,float16,float16,15,0.2814368009567261
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,8,128,1,float16,fp8,15,0.32574560642242434
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,8,128,1,float16,float16,31,0.28418400287628176
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,8,128,1,float16,fp8,31,0.33786559104919434
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,8,128,1,float16,float16,63,0.28743040561676025
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,8,128,1,float16,fp8,63,0.3410288095474243
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,8,128,1,float16,float16,127,0.2927680015563965
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,1,128,1,float16,float16,1,0.11823040246963501
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,64,8,128,1,float16,fp8,127,0.35173759460449217
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,1,128,1,float16,fp8,1,0.1307711958885193
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,1,128,1,float16,float16,3,0.11789599657058716
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,1,128,1,float16,fp8,3,0.13064639568328856
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,1,128,1,float16,float16,7,0.11877599954605103
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,1,128,1,float16,fp8,7,0.1299072027206421
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,1,128,1,float16,float16,15,0.11871839761734009
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,1,128,1,float16,fp8,15,0.1308351993560791
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,1,128,1,float16,float16,31,0.12020640373229981
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,1,128,1,float16,fp8,31,0.13156800270080565
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,1,128,1,float16,float16,63,0.1227679967880249
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,1,128,1,float16,fp8,63,0.13358720541000366
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,2,128,1,float16,float16,1,0.17279520034790039
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,2,128,1,float16,fp8,1,0.19794559478759766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,2,128,1,float16,float16,3,0.17318880558013916
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,2,128,1,float16,fp8,3,0.198689603805542
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,2,128,1,float16,float16,7,0.17359999418258668
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,2,128,1,float16,fp8,7,0.19849439859390258
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,2,128,1,float16,float16,15,0.1740272045135498
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,2,128,1,float16,fp8,15,0.20015840530395507
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,2,128,1,float16,float16,31,0.17509440183639527
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,2,128,1,float16,fp8,31,0.2001807928085327
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,2,128,1,float16,float16,63,0.17840479612350463
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,2,128,1,float16,fp8,63,0.20424160957336426
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,4,128,1,float16,float16,1,0.29090878963470457
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,4,128,1,float16,fp8,1,0.3419872045516968
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,4,128,1,float16,float16,3,0.29098238945007326
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,4,128,1,float16,fp8,3,0.3439136028289795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,4,128,1,float16,float16,7,0.29224319458007814
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,4,128,1,float16,fp8,7,0.34314560890197754
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,4,128,1,float16,float16,15,0.29289119243621825
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,4,128,1,float16,fp8,15,0.3461983919143677
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,4,128,1,float16,float16,31,0.29580159187316896
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,4,128,1,float16,fp8,31,0.34691519737243653
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,4,128,1,float16,float16,63,0.2978287935256958
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,4,128,1,float16,fp8,63,0.3529839992523193
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,8,128,1,float16,float16,1,0.530291223526001
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,8,128,1,float16,fp8,1,0.6365071773529053
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,8,128,1,float16,float16,3,0.5299456119537354
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,8,128,1,float16,fp8,3,0.6383584022521973
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,8,128,1,float16,float16,7,0.5305103778839111
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,8,128,1,float16,fp8,7,0.6390048027038574
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,8,128,1,float16,float16,15,0.5306064128875733
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,8,128,1,float16,fp8,15,0.6403872013092041
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,8,128,1,float16,float16,31,0.5338704109191894
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,8,128,1,float16,fp8,31,0.6335264205932617
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,1,0.01932159960269928
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,1,0.020236800611019134
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,8,128,1,float16,float16,63,0.5394415855407715
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,3,0.019556799530982973
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,64,8,128,1,float16,fp8,63,0.6403151988983155
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,3,0.020241600275039674
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,7,0.019439999759197236
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,7,0.020160000026226043
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,15,0.019551999866962433
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,15,0.02005600035190582
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,31,0.019900800287723543
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,31,0.020259200036525725
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,63,0.01924159973859787
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,63,0.020025600492954255
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,127,0.019385600090026857
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,127,0.02027360051870346
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,255,0.02113119959831238
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,255,0.022353599965572356
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,511,0.024089600145816802
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,511,0.025972801446914672
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,1023,0.024617600440979003
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,1023,0.025697600841522217
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,2047,0.02671839892864227
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,2047,0.02664799988269806
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,4095,0.02996639907360077
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,4095,0.029905599355697633
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,1,0.019617600739002226
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,1,0.02050720006227493
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,3,0.019840000569820403
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,3,0.02052319943904877
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,7,0.019651199877262115
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,7,0.020295999944210052
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,15,0.01966080069541931
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,15,0.02024960070848465
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,31,0.01958079934120178
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,31,0.02065120041370392
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,63,0.019516800343990327
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,63,0.020737600326538087
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,127,0.01977279931306839
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,127,0.02054239958524704
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,255,0.02141280025243759
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,255,0.02264000028371811
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,511,0.024851199984550477
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,511,0.02611039876937866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,1023,0.02602880001068115
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,1023,0.02624320089817047
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,2047,0.02791520059108734
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,2047,0.029688000679016113
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,4095,0.04136480093002319
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,4095,0.03568640053272247
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,1,0.019916799664497376
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,1,0.020713600516319274
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,3,0.01979680061340332
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,3,0.02091040015220642
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,7,0.020052799582481386
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,7,0.02051360011100769
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,15,0.019942399859428406
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,15,0.020897600054740905
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,31,0.019827200472354888
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,31,0.02056960016489029
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,63,0.019947199523448943
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,63,0.020656000077724456
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,127,0.01990559995174408
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,127,0.02101760059595108
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,255,0.021747200191020964
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,255,0.022884799540042876
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,511,0.02466080039739609
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,511,0.02655999958515167
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,1023,0.02802560031414032
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,1023,0.02991360127925873
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,2047,0.04128159880638123
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,2047,0.03686720132827759
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,4095,0.05751199722290039
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,4095,0.05492479801177978
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,1,0.02022400051355362
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,1,0.02091200053691864
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,3,0.02008160054683685
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,3,0.021167999505996703
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,7,0.02043360024690628
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,7,0.02110079973936081
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,15,0.020446400344371795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,15,0.02126079946756363
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,31,0.020105600357055664
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,31,0.021135999262332915
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,63,0.020292800664901734
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,63,0.021003200113773345
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,511,0.0272816002368927
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,127,0.020283199846744537
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,127,0.021198399364948273
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,255,0.022259199619293214
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,255,0.023422400653362273
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,511,0.02518559992313385
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,1023,0.04007039964199066
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,1023,0.03490079939365387
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,2047,0.05588799715042114
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,2047,0.054232001304626465
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,4095,0.08838880062103271
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,4095,0.08001919984817504
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,1,0.020955200493335723
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,1,0.022385600209236144
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,3,0.02154559940099716
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,3,0.022302399575710296
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,7,0.020951999723911284
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,7,0.021931199729442595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,15,0.02130880057811737
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,15,0.02178719937801361
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,31,0.020827199518680572
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,31,0.021768000721931458
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,63,0.021561600267887115
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,63,0.02213120013475418
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,127,0.021401600539684297
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,127,0.022174400091171265
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,255,0.022932800650596618
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,255,0.02399359941482544
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,511,0.025859200954437257
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,4095,0.059406399726867676
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,511,0.027739199995994567
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,1023,0.02980799973011017
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,8191,0.08047680258750915
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,1023,0.031198400259017944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,2047,0.04323039948940277
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,2047,0.03861759901046753
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,4095,0.05568479895591736
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,8191,0.09178239703178406
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,1,0.0215488001704216
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,1,0.02247200012207031
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,3,0.021353599429130555
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,3,0.022433599829673766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,7,0.021660800278186797
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,7,0.022339199483394623
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,15,0.021331200003623964
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,15,0.022233599424362184
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,31,0.021367999911308288
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,31,0.02216159999370575
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,63,0.02194560021162033
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,63,0.02221599966287613
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,127,0.02181120067834854
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,127,0.022308799624443054
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,1023,0.041540798544883725
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,255,0.02316800057888031
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,255,0.02473759949207306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,511,0.02645280063152313
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,511,0.028110399842262268
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,1023,0.036820799112319946
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,2047,0.05781440138816833
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,2047,0.05432479977607727
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,4095,0.08982719779014588
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,4095,0.07934560179710388
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,8191,0.15264639854431153
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,8191,0.1312816023826599
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,1,0.025644800066947936
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,1,0.027033600211143493
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,3,0.025174400210380553
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,3,0.026876801252365114
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,7,0.025252801179885865
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,15,0.025228801369667053
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,7,0.02678079903125763
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,15,0.026876801252365114
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,31,0.025275200605392456
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,31,0.026894399523735048
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,63,0.025198400020599365
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,63,0.027054399251937866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,127,0.025273600220680238
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,127,0.02672480046749115
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,255,0.028998398780822755
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,255,0.031139200925827025
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,511,0.04207200109958649
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,511,0.03859040141105652
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,1023,0.060108798742294314
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,1023,0.05964319705963135
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,2047,0.09364799857139587
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,2047,0.0837664008140564
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,4095,0.15590399503707886
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,4095,0.1354688048362732
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,1,0.03265919983386993
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,8191,0.276198410987854
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,8191,0.23588640689849855
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,1,0.035820800065994265
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,3,0.03310079872608185
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,3,0.03617599904537201
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,7,0.03279680013656616
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,7,0.03604319989681244
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,15,0.03282560110092163
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,15,0.07517600059509277
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,31,0.033108800649642944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,31,0.036160001158714296
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,63,0.03313280045986176
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,63,0.0361519992351532
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,127,0.03344480097293854
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,127,0.03586080074310303
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,255,0.04809280037879944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,255,0.04463039934635162
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,511,0.06111999750137329
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,511,0.06500160098075866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,1023,0.09688959717750549
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,1023,0.09267039895057679
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,2047,0.15799520015716553
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,2047,0.1404368042945862
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,4095,0.279753589630127
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,4095,0.2423408031463623
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,1,0.016540800034999848
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,1,0.01746080070734024
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,8191,0.5285583972930908
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,3,0.01655520051717758
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,8191,0.44012160301208497
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,3,0.017241600155830383
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,7,0.0164000004529953
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,7,0.01722240000963211
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,15,0.016396799683570863
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,15,0.01736160069704056
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,31,0.016654400527477263
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,31,0.017484800517559053
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,63,0.016288000345230102
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,63,0.017375999689102174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,127,0.016359999775886536
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,127,0.017239999771118165
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,255,0.017888000607490538
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,255,0.019124799966812135
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,511,0.020956799387931824
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,511,0.023299199342727662
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,1023,0.020528000593185425
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,1023,0.021833600103855134
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,2047,0.020793600380420683
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,2047,0.022017599642276765
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,4095,0.021512000262737273
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,4095,0.022649599611759184
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,8191,0.02258560061454773
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,8191,0.023768000304698944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,1,0.016916799545288085
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,1,0.017902399599552154
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,3,0.017046399414539337
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,3,0.017884799838066102
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,7,0.01716320067644119
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,7,0.018049600720405578
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,15,0.01730400025844574
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,15,0.017795200645923614
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,31,0.017364799976348877
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,31,0.01786399930715561
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,63,0.017025600373744964
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,63,0.018143999576568603
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,127,0.017228800058364867
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,127,0.018080000579357148
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,255,0.018619200587272643
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,255,0.02014559954404831
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,511,0.021990400552749634
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,511,0.02382880002260208
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,1023,0.021347199380397797
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,1023,0.022625599801540375
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,2047,0.021646399796009064
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,2047,0.02311840057373047
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,4095,0.022588799893856048
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,4095,0.023534399271011353
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,8191,0.02398560047149658
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,8191,0.024982400238513947
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,1,0.018993599712848662
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,1,0.01937119960784912
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,3,0.01866399943828583
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,3,0.01969760060310364
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,7,0.018537600338459016
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,7,0.019731199741363524
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,127,0.01868479996919632
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,15,0.018907199800014495
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,15,0.019791999459266664
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,31,0.018939200043678283
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,31,0.019657599925994872
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,63,0.018963199853897095
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,63,0.019675199687480927
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,127,0.01980320066213608
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,255,0.020612800121307374
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,255,0.021166400611400606
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,511,0.02327519953250885
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,511,0.025310400128364562
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,1023,0.023054400086402894
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,1023,0.024459199607372285
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,2047,0.023180800676345825
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,2047,0.02455520033836365
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,4095,0.023982399702072145
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,4095,0.0254256010055542
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,8191,0.02646079957485199
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,8191,0.026980799436569215
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,1,0.018827199935913086
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,1,0.019654400646686554
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,3,0.019083200395107268
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,3,0.01969120055437088
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,7,0.018768000602722167
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,7,0.019592000544071196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,15,0.019032000005245207
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,15,0.019753600656986236
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,31,0.018848000466823576
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,31,0.019755199551582336
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,63,0.018937599658966065
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,63,0.01982239931821823
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,127,0.018980799615383147
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,127,0.019443200528621675
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,255,0.020422400534152986
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,255,0.02150239944458008
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,511,0.023652799427509308
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,511,0.025382399559020996
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,1023,0.023131200671195985
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,1023,0.024531200528144836
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,2047,0.02396000027656555
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,2047,0.0251008003950119
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,4095,0.025920000672340394
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,4095,0.02619520127773285
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,3,0.018062399327754976
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,8191,0.029190400242805482
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,8191,0.02990399897098541
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,1,0.0173007994890213
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,1,0.017960000038146972
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,3,0.017310400307178498
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,7,0.01735839992761612
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,7,0.018105599284172057
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,15,0.01727840006351471
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,15,0.017990399897098542
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,31,0.017334400117397307
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,31,0.01814880073070526
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,63,0.01720480024814606
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,63,0.01809920072555542
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,127,0.017393599450588226
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,127,0.01833759993314743
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,255,0.018863999843597413
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,255,0.020110400021076204
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,511,0.021934400498867034
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,511,0.024073599278926848
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,1023,0.02128479927778244
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,1023,0.02290239930152893
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,2047,0.02178879976272583
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,2047,0.02303680032491684
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,4095,0.022543999552726745
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,4095,0.023721599578857423
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,8191,0.02414399981498718
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,8191,0.025047999620437623
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,1,0.018915200233459474
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,1,0.01982560008764267
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,3,0.0190528005361557
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,3,0.019891199469566346
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,7,0.0188400000333786
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,7,0.01980320066213608
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,15,0.018926399946212768
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,15,0.01976960003376007
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,31,0.01905120015144348
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,31,0.020080000162124634
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,63,0.018748800456523895
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,63,0.019849599897861482
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,127,0.019014400243759156
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,127,0.01971520036458969
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,255,0.020572799444198608
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,255,0.021876800060272216
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,511,0.023705600202083586
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,511,0.025518399477005006
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,1023,0.023319999873638152
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,1023,0.02483679950237274
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,2047,0.02348320037126541
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,2047,0.025121599435806274
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,4095,0.02470880001783371
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,4095,0.02571359872817993
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,8191,0.02720640003681183
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,8191,0.02710399925708771
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,1,0.018985599279403687
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,1,0.01964160054922104
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,3,0.019020800292491914
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,3,0.020032000541687012
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,7,0.019120000302791595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,7,0.019844800233840942
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,15,0.019182400405406953
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,15,0.02014400064945221
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,31,0.018940800428390504
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,31,0.0197952002286911
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,63,0.018910400569438934
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,63,0.02018879950046539
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,127,0.018836799263954162
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,127,0.01980479955673218
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,1023,0.024846400320529937
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,255,0.020606400072574617
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,255,0.022191999852657317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,511,0.023713600635528565
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,4095,0.02635200023651123
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,511,0.025252801179885865
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,1023,0.023134399950504304
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,2047,0.023966400325298308
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,2047,0.025035199522972108
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,4095,0.025700798630714415
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,8191,0.029553601145744325
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,8191,0.030035200715065002
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,1,0.01916320025920868
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,1,0.020244799554347992
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,3,0.01913599967956543
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,7,0.01918559968471527
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,3,0.02003999948501587
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,7,0.019832000136375427
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,15,0.01929440051317215
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,127,0.019201600551605226
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,15,0.020084799826145174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,31,0.01905120015144348
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,31,0.020024000108242034
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,63,0.019124799966812135
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,63,0.019734400510787963
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,127,0.020049600303173064
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,255,0.021057599782943727
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,255,0.021907199919223786
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,511,0.02388480007648468
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,511,0.025654399394989015
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,1023,0.023473599553108217
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,1023,0.024846400320529937
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,2047,0.025444799661636354
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,2047,0.026075199246406555
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,4095,0.02938719987869263
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,4095,0.02956799864768982
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,8191,0.03992800116539001
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,8191,0.03573760092258453
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,1,0.02009280025959015
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,1,0.020839999616146087
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,3,0.01993280053138733
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,3,0.020916800200939178
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,7,0.02014880031347275
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,7,0.02094399929046631
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,15,0.02008800059556961
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,15,0.02075359970331192
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,31,0.020027199387550355
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,31,0.02067520022392273
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,255,0.022779199481010436
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,63,0.020100800693035124
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,63,0.020945599675178526
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,127,0.020347200334072113
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,127,0.020979200303554536
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,255,0.02195200026035309
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,511,0.024758400022983552
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,511,0.026601600646972656
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,1023,0.026476800441741943
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,1023,0.02643519937992096
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,2047,0.028459200263023378
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,2047,0.030076798796653748
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,4095,0.04190559983253479
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,4095,0.03633280098438263
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,8191,0.05794559717178345
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,8191,0.05435839891433716
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,1,0.020105600357055664
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,1,0.02099040001630783
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,3,0.020161600410938264
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,3,0.021185599267482758
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,7,0.020417599380016326
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,7,0.021057599782943727
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,15,0.020283199846744537
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,15,0.021036800742149354
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,31,0.020644800364971162
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,31,0.021264000236988066
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,63,0.02034880071878433
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,63,0.021433599293231964
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,127,0.020494399964809416
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,127,0.021247999370098115
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,255,0.02208320051431656
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,255,0.023227199912071228
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,511,0.025516799092292784
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,511,0.02696160078048706
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,1023,0.0297760009765625
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,1023,0.029993599653244017
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,2047,0.04211840033531189
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,8191,0.08000159859657288
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,2047,0.03616159856319427
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,4095,0.05795999765396118
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,4095,0.05504639744758606
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,8191,0.09150400161743164
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,1,0.020563200116157532
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,1,0.02140959948301315
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,3,0.02073120027780533
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,3,0.021503999829292297
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,7,0.020846399664878845
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,15,0.02059199959039688
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,7,0.02168000042438507
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,15,0.02146400064229965
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,31,0.020766399800777435
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,31,0.021411199867725373
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,63,0.020577600598335265
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,63,0.02146719992160797
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,127,0.020716799795627593
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,127,0.021827200055122377
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,255,0.022575999796390533
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,255,0.023662400245666505
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,511,0.027856001257896425
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,511,0.027632001042366027
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,1023,0.040115201473236085
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,1023,0.03533760011196137
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,2047,0.05615519881248474
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,2047,0.05432159900665283
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,4095,0.08981760144233704
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,4095,0.08018720149993896
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,8191,0.15178560018539428
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,8191,0.13026880025863646
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,1,0.024383999407291412
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,1,0.025966399908065797
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,3,0.024558399617671967
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,3,0.026134398579597474
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,7,0.024553599953651428
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,7,0.02614560127258301
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,15,0.024579200148582458
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,15,0.02621760070323944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,31,0.024427199363708497
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,31,0.026111999154090883
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,63,0.024673600494861603
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,511,0.04035359919071198
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,63,0.026236799359321595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,127,0.024633599817752837
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,127,0.02619520127773285
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,255,0.028644800186157227
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,255,0.030191999673843384
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,511,0.038155201077461245
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,1023,0.05877760052680969
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,1023,0.05836319923400879
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,2047,0.09055039882659913
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,2047,0.08348159790039063
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,4095,0.153711998462677
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,4095,0.13531839847564697
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,8191,0.2732431888580322
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,1,0.018937599658966065
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,8191,0.23639519214630128
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,1,0.019657599925994872
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,3,0.018822400271892546
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,31,0.018963199853897095
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,3,0.01995519995689392
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,7,0.019222399592399596
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,7,0.01985439956188202
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,15,0.018964800238609313
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,15,0.019920000433921815
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,31,0.020003199577331543
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,63,0.01915840059518814
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,63,0.019980800151824952
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,127,0.019036799669265747
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,127,0.019937600195407867
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,255,0.02064319998025894
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,255,0.021692800521850585
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,511,0.02385440021753311
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,511,0.02555519938468933
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,1023,0.023387199640274046
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,1023,0.024745599925518037
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,2047,0.02377600073814392
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,2047,0.024991999566555022
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,4095,0.024899199604988098
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,4095,0.025854399800300597
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,8191,0.026899200677871705
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,8191,0.02746880054473877
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,1,0.01899999976158142
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,15,0.019198399782180787
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,1,0.019993600249290467
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,3,0.01910399943590164
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,3,0.019648000597953796
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,7,0.019032000005245207
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,7,0.019679999351501463
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,15,0.019897599518299103
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,31,0.019105599820613862
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,31,0.0200095996260643
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,63,0.019444799423217772
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,63,0.019812799990177155
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,127,0.01926559954881668
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,127,0.019838400185108185
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,255,0.020556800067424774
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,255,0.022247999906539917
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,511,0.023953600227832793
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,511,0.025390401482582092
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,1023,0.023659199476242065
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,1023,0.024728000164031982
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,2047,0.024539199471473695
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,2047,0.025360000133514405
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,4095,0.02632479965686798
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,4095,0.02656799852848053
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,8191,0.029783999919891356
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,8191,0.030292800068855284
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,1,0.01912959963083267
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,1,0.019889600574970245
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,3,0.019169600307941438
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,3,0.020108799636363982
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,7,0.019392000138759614
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,7,0.019871999323368073
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,15,0.01907680034637451
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,15,0.019966399669647215
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,31,0.019270400702953338
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,31,0.01998720020055771
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,63,0.019403199851512908
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,63,0.020102399587631225
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,1023,0.02369920015335083
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,127,0.019094400107860565
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,127,0.020175999402999877
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,255,0.021073600649833678
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,255,0.0220880001783371
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,511,0.023758399486541747
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,511,0.02592960000038147
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,1023,0.025076800584793092
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,2047,0.025470399856567384
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,2047,0.025995200872421263
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,4095,0.029182401299476624
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,4095,0.029603201150894164
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,8191,0.04013279974460602
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,8191,0.035892799496650696
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,1,0.01924320012331009
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,1,0.019950400292873382
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,3,0.019619199633598327
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,3,0.020563200116157532
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,7,0.019256000220775605
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,7,0.020027199387550355
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,15,0.019540800154209136
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,15,0.020180800557136537
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,31,0.01942239999771118
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,31,0.020294399559497835
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,63,0.019480000436306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,63,0.020478400588035583
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,127,0.01958879977464676
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,127,0.02030719965696335
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,255,0.021104000508785248
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,255,0.022169600427150726
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,511,0.024171200394630433
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,511,0.026177600026130676
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,1023,0.025679999589920045
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,1023,0.02590720057487488
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,2047,0.02768160104751587
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,2047,0.029531198740005492
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,4095,0.039705601334571836
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,3,0.023119999468326567
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,4095,0.035392001271247864
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,8191,0.05583840012550354
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,8191,0.05378400087356568
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,1,0.022996799647808076
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,1,0.023772799968719484
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,3,0.023479999601840974
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,7,0.022750400006771088
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,7,0.02404160052537918
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,15,0.02309119999408722
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,15,0.023904000222682954
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,31,0.02337760031223297
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,31,0.024113599956035615
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,63,0.02268480062484741
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,63,0.023871999979019166
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,127,0.023393599689006804
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,127,0.023756800591945647
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,255,0.024886399507522583
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,255,0.026035198569297792
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,511,0.0280239999294281
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,2047,0.05578879714012146
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,511,0.029315200448036195
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,1023,0.043761599063873294
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,1023,0.038027200102806094
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,2047,0.06030240058898926
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,4095,0.09259999990463257
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,4095,0.08181759715080261
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,1,0.026855999231338502
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,1,0.028230398893356323
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,3,0.026782399415969847
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,3,0.028467199206352232
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,7,0.026859200000762938
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,7,0.02834239900112152
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,15,0.026684799790382387
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,15,0.028364801406860353
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,31,0.02667199969291687
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,31,0.028411200642585753
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,63,0.02677600085735321
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,63,0.028567999601364136
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,127,0.026649600267410277
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,127,0.028779199719429015
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,255,0.03043360114097595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,255,0.03225440084934235
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,511,0.044249600172042845
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,511,0.04014399945735932
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,1023,0.0625711977481842
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,1023,0.06151360273361206
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,2047,0.09616159796714782
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,2047,0.08530719876289368
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,4095,0.15544320344924928
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,4095,0.13664959669113158
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,1,0.03448480069637298
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,1,0.03746399879455566
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,3,0.03439840078353882
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,3,0.03747679889202118
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,7,0.03442240059375763
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,7,0.037467199563980105
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,15,0.03439359962940216
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,15,0.03775680065155029
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,31,0.03470560014247894
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,31,0.03773599863052368
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,63,0.03462719917297363
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,63,0.037371200323104856
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,127,0.03450399935245514
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,127,0.03776800036430359
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,255,0.04987840056419372
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,255,0.04631359875202179
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,511,0.06375679969787598
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,511,0.06671680212020874
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,1023,0.09961599707603455
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,1023,0.09414880275726319
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,2047,0.1609663963317871
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,2047,0.1418720006942749
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,1,0.048747199773788455
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,4095,0.2798559904098511
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,1,0.055576002597808837
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,4095,0.2428352117538452
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,3,0.04864319860935211
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,3,0.05531039834022522
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,7,0.049030399322509764
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,7,0.055185598134994504
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,15,0.04900639951229095
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,15,0.05544319748878479
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,31,0.04912160038948059
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,31,0.05536159873008728
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,63,0.049804800748825075
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,63,0.05537440180778504
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,127,0.05725600123405457
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,127,0.05662239789962768
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,255,0.07308639883995056
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,255,0.07774400115013122
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,511,0.10251840353012084
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,511,0.10764000415802003
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,1023,0.16782079935073851
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,1023,0.1594272017478943
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,2047,0.29023840427398684
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,2047,0.25220799446105957
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,1,128,1,float16,float16,1,0.02990880012512207
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,1,128,1,float16,fp8,1,0.031699201464653014
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,4095,0.5448959827423095
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,4095,0.4552271842956543
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,1,128,1,float16,float16,3,0.029905599355697633
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,1,128,1,float16,fp8,3,0.031225600838661195
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,1,128,1,float16,float16,7,0.03022400140762329
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,1,128,1,float16,fp8,7,0.03123359978199005
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,1,128,1,float16,float16,15,0.029976001381874083
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,1,128,1,float16,fp8,15,0.031462401151657104
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,1,128,1,float16,float16,31,0.029953598976135254
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,1,128,1,float16,fp8,31,0.031799998879432675
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,1,128,1,float16,float16,63,0.029702401161193846
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,1,128,1,float16,fp8,63,0.03125919997692108
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,1,128,1,float16,float16,127,0.029819199442863466
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,1,128,1,float16,fp8,127,0.031462401151657104
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,1,128,1,float16,float16,255,0.0331712007522583
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,1,128,1,float16,fp8,255,0.03557760119438171
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,1,128,1,float16,float16,511,0.048860800266265866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,1,128,1,float16,fp8,511,0.044968000054359435
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,1,128,1,float16,float16,1023,0.06761919856071472
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,1,128,1,float16,fp8,1023,0.0662608027458191
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,2,128,1,float16,float16,1,0.037303999066352844
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,2,128,1,float16,fp8,1,0.04032799899578095
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,2,128,1,float16,float16,3,0.03718400001525879
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,2,128,1,float16,fp8,3,0.040659201145172116
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,2,128,1,float16,float16,7,0.03724479973316193
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,2,128,1,float16,fp8,7,0.04039680063724518
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,2,128,1,float16,float16,15,0.03709119856357575
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,2,128,1,float16,fp8,15,0.04043999910354614
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,2,128,1,float16,float16,31,0.03739359974861145
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,2,128,1,float16,fp8,31,0.040310400724411014
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,2,128,1,float16,float16,63,0.03773599863052368
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,2,128,1,float16,fp8,63,0.04095520079135895
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,2,128,1,float16,float16,127,0.038699200749397276
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,2,128,1,float16,fp8,127,0.04031839966773987
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,2,128,1,float16,float16,255,0.05383679866790771
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,2,128,1,float16,fp8,255,0.04873920083045959
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,2,128,1,float16,float16,511,0.0687391996383667
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,2,128,1,float16,fp8,511,0.070033597946167
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,2,128,1,float16,float16,1023,0.1043984055519104
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,2,128,1,float16,fp8,1023,0.09836159944534302
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,4,128,1,float16,float16,1,0.051844799518585206
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,4,128,1,float16,fp8,1,0.05824480056762695
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,4,128,1,float16,float16,3,0.052401602268218994
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,4,128,1,float16,fp8,3,0.058241599798202516
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,4,128,1,float16,float16,7,0.05248799920082092
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,4,128,1,float16,fp8,7,0.05847679972648621
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,4,128,1,float16,float16,15,0.05189120173454285
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,4,128,1,float16,fp8,15,0.05823519825935364
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,4,128,1,float16,float16,31,0.05204960107803345
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,4,128,1,float16,fp8,31,0.058590400218963626
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,4,128,1,float16,float16,63,0.05313119888305664
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,4,128,1,float16,fp8,63,0.058633601665496825
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,4,128,1,float16,float16,127,0.060382401943206786
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,4,128,1,float16,fp8,127,0.060089600086212155
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,4,128,1,float16,float16,255,0.07702720165252686
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,4,128,1,float16,fp8,255,0.08068000078201294
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,4,128,1,float16,float16,511,0.1068992018699646
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,4,128,1,float16,fp8,511,0.11052479743957519
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,4,128,1,float16,float16,1023,0.17348320484161378
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,8,128,1,float16,float16,1,0.08035039901733398
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,4,128,1,float16,fp8,1023,0.16456960439682006
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,8,128,1,float16,fp8,1,0.09363200068473816
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,8,128,1,float16,float16,3,0.08072959780693054
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,8,128,1,float16,fp8,3,0.09408640265464782
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,8,128,1,float16,float16,7,0.08064960241317749
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,8,128,1,float16,fp8,7,0.09408640265464782
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,8,128,1,float16,float16,15,0.0807856023311615
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,8,128,1,float16,fp8,15,0.09393759965896606
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,8,128,1,float16,float16,31,0.08129119873046875
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,8,128,1,float16,fp8,31,0.09371520280838012
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,8,128,1,float16,float16,63,0.08884000182151794
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,8,128,1,float16,fp8,63,0.0986303985118866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,8,128,1,float16,float16,127,0.09147520065307617
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,8,128,1,float16,fp8,127,0.1052575945854187
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,8,128,1,float16,float16,255,0.12176159620285035
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,8,128,1,float16,fp8,255,0.1327839970588684
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,8,128,1,float16,float16,511,0.17754080295562744
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,8,128,1,float16,fp8,511,0.19033600091934205
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,8,128,1,float16,float16,1023,0.30753440856933595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,1,128,1,float16,float16,1,0.04441120028495789
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,32,8,128,1,float16,fp8,1023,0.29328479766845705
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,1,128,1,float16,fp8,1,0.04758720099925995
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,1,128,1,float16,float16,3,0.0443695992231369
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,1,128,1,float16,fp8,3,0.04733920097351074
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,1,128,1,float16,float16,7,0.0443664014339447
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,1,128,1,float16,fp8,7,0.047435200214385985
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,1,128,1,float16,float16,15,0.0442656010389328
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,1,128,1,float16,fp8,15,0.047491198778152464
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,1,128,1,float16,float16,31,0.044468799233436586
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,1,128,1,float16,fp8,31,0.047628799080848695
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,1,128,1,float16,float16,63,0.044567999243736264
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,1,128,1,float16,fp8,63,0.04756959974765777
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,1,128,1,float16,float16,127,0.04676159918308258
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,1,128,1,float16,fp8,127,0.0475600004196167
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,1,128,1,float16,float16,255,0.06069440245628357
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,1,128,1,float16,fp8,255,0.058443200588226316
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,1,128,1,float16,float16,511,0.07675359845161438
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,1,128,1,float16,fp8,511,0.07699999809265137
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,2,128,1,float16,float16,1,0.058457601070404056
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,2,128,1,float16,fp8,1,0.0645632028579712
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,2,128,1,float16,float16,3,0.05873759984970093
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,2,128,1,float16,fp8,3,0.06439039707183838
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,2,128,1,float16,float16,7,0.058740800619125365
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,2,128,1,float16,fp8,7,0.0648144006729126
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,2,128,1,float16,float16,15,0.05888640284538269
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,2,128,1,float16,fp8,15,0.06463519930839538
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,2,128,1,float16,float16,31,0.058569598197937014
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,2,128,1,float16,fp8,31,0.06470720171928405
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,2,128,1,float16,float16,63,0.06276000142097474
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,2,128,1,float16,fp8,63,0.06455039978027344
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,2,128,1,float16,float16,127,0.06798239946365356
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,2,128,1,float16,fp8,127,0.068995201587677
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,2,128,1,float16,float16,255,0.08577759861946106
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,2,128,1,float16,fp8,255,0.08628960251808167
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,2,128,1,float16,float16,511,0.11460800170898437
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,2,128,1,float16,fp8,511,0.11807359457015991
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,4,128,1,float16,float16,1,0.08728320002555848
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,4,128,1,float16,fp8,1,0.09959359765052796
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,4,128,1,float16,float16,3,0.08708800077438354
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,4,128,1,float16,fp8,3,0.10008959770202637
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,4,128,1,float16,float16,7,0.08754240274429322
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,4,128,1,float16,fp8,7,0.09993280172348022
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,4,128,1,float16,float16,15,0.0874559998512268
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,4,128,1,float16,fp8,15,0.10067839622497558
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,4,128,1,float16,float16,31,0.08696320056915283
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,4,128,1,float16,fp8,31,0.10037440061569214
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,4,128,1,float16,float16,63,0.09497119784355164
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,4,128,1,float16,fp8,63,0.1043552041053772
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,4,128,1,float16,float16,127,0.09847840070724487
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,4,128,1,float16,fp8,127,0.11057920455932617
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,4,128,1,float16,float16,255,0.1303760051727295
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,4,128,1,float16,fp8,255,0.1380944013595581
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,4,128,1,float16,float16,511,0.1855407953262329
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,8,128,1,float16,float16,1,0.14221760034561157
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,4,128,1,float16,fp8,511,0.19922239780426027
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,8,128,1,float16,fp8,1,0.16941440105438232
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,8,128,1,float16,float16,3,0.14221279621124266
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,8,128,1,float16,fp8,3,0.16959999799728392
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,8,128,1,float16,float16,7,0.14133440256118773
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,8,128,1,float16,fp8,7,0.16979039907455445
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,8,128,1,float16,float16,15,0.14330559968948364
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,8,128,1,float16,fp8,15,0.17034879922866822
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,8,128,1,float16,float16,31,0.14954559803009032
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,8,128,1,float16,fp8,31,0.17027360200881958
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,8,128,1,float16,float16,63,0.1542688012123108
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,8,128,1,float16,fp8,63,0.1816704034805298
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,8,128,1,float16,float16,127,0.15895839929580688
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,8,128,1,float16,fp8,127,0.18668320178985595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,8,128,1,float16,float16,255,0.2180095911026001
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,8,128,1,float16,fp8,255,0.24080960750579833
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,8,128,1,float16,float16,511,0.3311072111129761
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,1,0.019152000546455383
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,1,0.019849599897861482
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,32,8,128,1,float16,fp8,511,0.35899200439453127
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,3,0.01913760006427765
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,3,0.020095999538898467
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,7,0.019215999543666838
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,7,0.020095999538898467
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,15,0.019424000382423402
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,15,0.020139199495315552
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,31,0.019228799641132353
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,31,0.020230400562286376
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,63,0.019387200474739075
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,63,0.020262399315834047
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,127,0.019278399646282196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,127,0.019945600628852846
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,255,0.02112800031900406
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,255,0.02216479927301407
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,511,0.0239424005150795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,511,0.025937598943710328
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,1023,0.023814399540424348
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,1023,0.02494720071554184
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,2047,0.02447360008955002
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,2047,0.025390401482582092
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,4095,0.026209598779678343
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,4095,0.02645919919013977
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,8191,0.02982560098171234
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,8191,0.03054879903793335
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,1,0.019403199851512908
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,1,0.020032000541687012
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,3,0.01929280012845993
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,3,0.02003680020570755
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,7,0.01947360038757324
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,7,0.020190399885177613
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,15,0.01926079988479614
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,15,0.02016319930553436
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,31,0.019313600659370423
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,31,0.02008800059556961
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,63,0.019204799830913544
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,63,0.020150400698184967
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,127,0.019489599764347075
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,127,0.020168000459671022
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,255,0.021083199977874757
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,255,0.02239519953727722
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,511,0.02412160038948059
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,511,0.02590720057487488
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,1023,0.024094399809837342
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,1023,0.025246399641036987
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,2047,0.025724801421165466
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,2047,0.025993600487709045
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,4095,0.029339200258255003
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,4095,0.029766398668289184
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,8191,0.04138559997081757
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,8191,0.035815998911857605
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,1,0.019575999677181245
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,1,0.02041279971599579
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,3,0.019463999569416045
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,3,0.020420800149440765
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,7,0.019518400728702544
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,7,0.02033119946718216
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,15,0.019756799936294554
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,31,0.019499200582504272
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,31,0.020380799472332
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,63,0.019806399941444397
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,63,0.02056480050086975
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,127,0.019628800451755524
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,15,0.020444799959659577
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,127,0.020396800339221956
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,255,0.021305599808692934
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,255,0.022280000150203705
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,511,0.024952000379562377
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,511,0.026443201303482055
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,1023,0.025563201308250426
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,4095,0.03550240099430084
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,1023,0.02590399980545044
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,2047,0.027641600370407103
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,2047,0.029502400755882265
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,4095,0.040715199708938596
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,8191,0.0563696026802063
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,8191,0.05375679731369019
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,1,0.019977599382400513
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,1,0.020678399503231047
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,3,0.019780799746513367
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,3,0.0207056000828743
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,7,0.019908800721168518
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,7,0.020688000321388244
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,15,0.019681599736213685
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,15,0.02064639925956726
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,31,0.019844800233840942
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,31,0.020446400344371795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,63,0.019867199659347533
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,63,0.020638400316238405
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,127,0.019860799610614776
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,127,0.02096800059080124
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,255,0.021631999313831328
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,255,0.022702400386333466
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,511,0.024926400184631346
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,511,0.02646079957485199
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,1023,0.027668800950050355
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,1023,0.02963840067386627
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,2047,0.04072639942169189
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,2047,0.03540480136871338
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,4095,0.056334400177001955
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,4095,0.05408160090446472
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,8191,0.08947839736938476
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,8191,0.07919999957084656
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,1,128,1,float16,float16,1,0.06604160070419311
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,1,128,1,float16,fp8,1,0.07220479846000671
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,1,128,1,float16,float16,3,0.06571840047836304
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,1,128,1,float16,fp8,3,0.0719968020915985
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,1,128,1,float16,float16,7,0.06650239825248719
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,1,128,1,float16,fp8,7,0.07222560048103333
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,1,128,1,float16,float16,15,0.06593919992446899
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,1,128,1,float16,fp8,15,0.07179999947547913
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,1,128,1,float16,float16,31,0.06611520051956177
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,1,128,1,float16,float16,255,0.09278079867362976
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,1,128,1,float16,fp8,31,0.07283679842948913
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,1,128,1,float16,float16,63,0.07225440144538879
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,1,128,1,float16,fp8,63,0.07261760234832763
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,1,128,1,float16,float16,127,0.0772704005241394
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,2,128,1,float16,fp8,3,0.10605119466781616
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,1,128,1,float16,fp8,127,0.07905759811401367
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,1,128,1,float16,fp8,255,0.09520639777183533
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,2,128,1,float16,float16,1,0.0946175992488861
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,2,128,1,float16,fp8,1,0.10622559785842896
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,2,128,1,float16,float16,3,0.09464160203933716
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,2,128,1,float16,float16,7,0.09416159987449646
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,2,128,1,float16,fp8,7,0.10671039819717407
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,2,128,1,float16,float16,15,0.09435200095176696
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,2,128,1,float16,fp8,15,0.10717920064926148
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,2,128,1,float16,float16,31,0.0972815990447998
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,2,128,1,float16,fp8,31,0.10696320533752442
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,2,128,1,float16,float16,63,0.10243680477142333
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,2,128,1,float16,fp8,63,0.11491999626159669
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,2,128,1,float16,float16,127,0.10656800270080566
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,2,128,1,float16,fp8,127,0.11744320392608643
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,2,128,1,float16,float16,255,0.13762240409851073
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,4,128,1,float16,float16,7,0.15065759420394897
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,2,128,1,float16,fp8,255,0.1450943946838379
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,4,128,1,float16,float16,1,0.15085439682006835
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,4,128,1,float16,fp8,1,0.17547520399093627
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,4,128,1,float16,float16,3,0.15052640438079834
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,4,128,1,float16,fp8,3,0.17639360427856446
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,4,128,1,float16,fp8,7,0.17607359886169432
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,4,128,1,float16,fp8,15,0.17528480291366577
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,4,128,1,float16,float16,15,0.15069119930267333
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,4,128,1,float16,float16,31,0.1589951992034912
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,4,128,1,float16,fp8,31,0.17906880378723145
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,4,128,1,float16,float16,63,0.1615023970603943
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,4,128,1,float16,fp8,63,0.18854880332946777
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,4,128,1,float16,float16,127,0.16548160314559937
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,4,128,1,float16,fp8,127,0.19237600564956664
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,4,128,1,float16,float16,255,0.22277600765228273
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,4,128,1,float16,fp8,255,0.24453599452972413
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,8,128,1,float16,float16,1,0.26208319664001467
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,8,128,1,float16,fp8,1,0.3119328022003174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,8,128,1,float16,float16,3,0.2624783992767334
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,8,128,1,float16,fp8,3,0.31233921051025393
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,8,128,1,float16,float16,7,0.26207358837127687
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,8,128,1,float16,fp8,7,0.31353919506072997
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,8,128,1,float16,float16,15,0.2711551904678345
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,8,128,1,float16,fp8,15,0.3125648021697998
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,8,128,1,float16,float16,31,0.28019840717315675
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,8,128,1,float16,fp8,31,0.3356208086013794
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,8,128,1,float16,float16,63,0.28327839374542235
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,8,128,1,float16,float16,255,0.40218558311462405
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,8,128,1,float16,fp8,63,0.3363392114639282
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,8,128,1,float16,float16,127,0.29084799289703367
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,8,128,1,float16,fp8,127,0.3456032037734985
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,1,128,1,float16,float16,1,0.10765600204467773
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,32,8,128,1,float16,fp8,255,0.44669599533081056
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,1,128,1,float16,fp8,1,0.11886240243911743
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,1,128,1,float16,float16,3,0.10716960430145264
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,1,128,1,float16,fp8,3,0.11869920492172241
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,1,128,1,float16,float16,7,0.10793919563293457
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,1,128,1,float16,fp8,7,0.11913119554519654
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,1,128,1,float16,float16,15,0.11066399812698365
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,1,128,1,float16,fp8,15,0.11897920370101929
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,1,128,1,float16,float16,31,0.11480640172958374
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,1,128,1,float16,fp8,31,0.12169920206069947
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,1,128,1,float16,float16,63,0.11781920194625854
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,1,128,1,float16,fp8,63,0.1300431966781616
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,1,128,1,float16,float16,127,0.12216479778289795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,1,128,1,float16,fp8,127,0.13196959495544433
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,2,128,1,float16,float16,1,0.16291680335998535
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,2,128,1,float16,fp8,1,0.1866495966911316
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,2,128,1,float16,float16,3,0.16384320259094237
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,2,128,1,float16,fp8,3,0.18624000549316405
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,2,128,1,float16,float16,7,0.16342079639434814
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,2,128,1,float16,fp8,7,0.1857184052467346
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,2,128,1,float16,float16,15,0.167086398601532
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,2,128,1,float16,fp8,15,0.1868880033493042
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,2,128,1,float16,float16,31,0.17201119661331177
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,2,128,1,float16,fp8,31,0.19446079730987548
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,2,128,1,float16,float16,63,0.17399040460586548
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,2,128,1,float16,fp8,63,0.20167360305786133
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,2,128,1,float16,float16,127,0.18016480207443236
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,2,128,1,float16,fp8,127,0.20291359424591066
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,4,128,1,float16,float16,1,0.27890400886535643
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,4,128,1,float16,fp8,1,0.327019190788269
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,4,128,1,float16,float16,3,0.2803040027618408
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,4,128,1,float16,fp8,3,0.32728960514068606
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,4,128,1,float16,float16,7,0.2843231916427612
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,4,128,1,float16,fp8,7,0.3275311946868896
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,4,128,1,float16,float16,15,0.2869136095046997
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,4,128,1,float16,fp8,15,0.33459839820861814
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,4,128,1,float16,float16,31,0.2882159948348999
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,4,128,1,float16,fp8,31,0.34508481025695803
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,4,128,1,float16,float16,63,0.2926896095275879
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,4,128,1,float16,fp8,63,0.3488912105560303
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,4,128,1,float16,float16,127,0.30128800868988037
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,4,128,1,float16,fp8,127,0.353272008895874
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,8,128,1,float16,float16,1,0.5163375854492187
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,8,128,1,float16,fp8,1,0.5987152099609375
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,8,128,1,float16,float16,3,0.5139455795288086
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,8,128,1,float16,fp8,3,0.6043263912200928
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,8,128,1,float16,float16,7,0.5167056083679199
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,8,128,1,float16,fp8,7,0.6071023941040039
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,8,128,1,float16,float16,15,0.5221231937408447
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,8,128,1,float16,fp8,15,0.6397039890289307
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,8,128,1,float16,float16,31,0.5248591899871826
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,8,128,1,float16,fp8,31,0.6373343944549561
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,8,128,1,float16,float16,63,0.5279407978057862
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,1,0.019208000600337984
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,8,128,1,float16,fp8,63,0.641371202468872
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,1,0.020267200469970704
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,8,128,1,float16,float16,127,0.5446928024291993
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,3,0.01924159973859787
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,32,8,128,1,float16,fp8,127,0.6454495906829834
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,3,0.02056799978017807
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,7,0.019625599682331085
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,7,0.020392000675201416
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,15,0.019444799423217772
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,15,0.02038560062646866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,31,0.019470399618148802
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,31,0.020203199982643128
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,63,0.01932159960269928
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,63,0.020136000216007234
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,127,0.01945279985666275
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,127,0.020374399423599244
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,255,0.02125920057296753
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,255,0.022388799488544463
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,511,0.02414720058441162
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,511,0.026497599482536317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,1023,0.024196800589561463
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,1023,0.025284799933433532
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,2047,0.02598559856414795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,2047,0.026214399933815004
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,4095,0.029502400755882265
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,4095,0.029543998837471008
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,8191,0.04183039963245392
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,8191,0.036025598645210266
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,1,0.01969279944896698
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,1,0.020390400290489198
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,3,0.019457599520683287
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,3,0.020494399964809416
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,7,0.019601599872112276
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,7,0.02041600048542023
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,15,0.019592000544071196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,15,0.020875200629234314
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,31,0.019446399807929993
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,31,0.02029920071363449
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,63,0.01958400011062622
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,63,0.020340800285339355
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,127,0.019630399346351624
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,127,0.0204815998673439
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,255,0.021347199380397797
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,255,0.02248159945011139
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,511,0.02428800016641617
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,511,0.02605760097503662
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,1023,0.025879999995231627
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,1023,0.026038399338722228
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,2047,0.027953600883483885
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,2047,0.02949439883232117
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,4095,0.041550400853157046
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,4095,0.03548319935798645
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,8191,0.05711680054664612
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,8191,0.0541920006275177
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,1,0.01974399983882904
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,1,0.020659199357032774
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,3,0.020017600059509276
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,3,0.020937600731849672
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,7,0.019708800315856933
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,7,0.020929600298404693
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,15,0.019867199659347533
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,15,0.020686399936676026
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,31,0.019833600521087645
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,31,0.0207056000828743
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,63,0.019776000082492827
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,63,0.020670400559902193
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,127,0.019894400238990785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,127,0.02089280039072037
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,255,0.02168480008840561
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,255,0.022720000147819518
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,511,0.025019198656082153
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,511,0.026678401231765746
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,1023,0.027801600098609925
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,1023,0.029558399319648744
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,2047,0.040889599919319154
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,2047,0.03575839996337891
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,4095,0.05670560002326965
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,4095,0.05488640069961548
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,8191,0.09027519822120667
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,1,0.020043200254440306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,8191,0.07941600084304809
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,1,0.021035200357437132
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,3,0.02011680006980896
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,3,0.020868800580501556
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,7,0.020275199413299562
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,7,0.02128639966249466
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,15,0.02034880071878433
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,15,0.021163199841976166
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,31,0.02017119973897934
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,31,0.020983999967575072
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,63,0.020316800475120543
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,63,0.021083199977874757
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,127,0.02029920071363449
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,127,0.021371200680732727
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,255,0.02226240038871765
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,255,0.02324319928884506
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,511,0.02539840042591095
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,511,0.026995199918746948
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,1023,0.039540800452232364
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,1023,0.035104000568389894
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,2047,0.05584319829940796
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,2047,0.05435360074043274
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,4095,0.08838719725608826
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,4095,0.0797872006893158
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,8191,0.148579204082489
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,8191,0.13117599487304688
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,1,0.02098720073699951
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,1,0.021779200434684752
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,3,0.021193599700927733
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,3,0.021991999447345735
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,7,0.020873600244522096
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,7,0.021807999908924104
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,15,0.02083200067281723
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,15,0.021592000126838685
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,255,0.022912000119686127
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,31,0.021132799983024596
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,31,0.021766400337219237
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,63,0.02102559953927994
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,63,0.02172800004482269
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,127,0.020843200385570526
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,127,0.021779200434684752
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,255,0.0238319993019104
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,511,0.025803199410438536
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,511,0.0274944007396698
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,1023,0.02903839945793152
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,1023,0.03049120008945465
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,2047,0.04248799979686737
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,2047,0.036855998635292056
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,4095,0.058361601829528806
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,4095,0.05569599866867066
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,8191,0.09191679954528809
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,8191,0.08033279776573181
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,16383,0.15369759798049926
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,1,0.02134400010108948
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,16383,0.13321919441223146
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,1,0.022270399332046508
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,3,0.02151840031147003
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,3,0.022044800221920013
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,7,0.02163040041923523
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,7,0.022247999906539917
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,15,0.02117439955472946
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,15,0.02221439927816391
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,31,0.021355199813842773
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,31,0.022275200486183165
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,63,0.021355199813842773
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,63,0.022356800734996796
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,127,0.02130720019340515
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,127,0.022467200458049775
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,255,0.023206399381160737
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,255,0.02439039945602417
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,511,0.026446399092674256
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,511,0.02802560031414032
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,1023,0.041457599401474
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,1023,0.03550559878349304
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,2047,0.057175999879837035
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,2047,0.05464159846305847
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,4095,0.09041759967803956
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,4095,0.07990400195121765
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,8191,0.15053919553756714
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,8191,0.13227360248565673
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,16383,0.27324159145355226
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,1,0.025124800205230714
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,1,0.026825600862503053
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,7,0.026582399010658266
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,16383,0.2312688112258911
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,3,0.025388801097869874
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,3,0.026804798841476442
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,7,0.025135999917984007
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,15,0.025243198871612547
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,15,0.026894399523735048
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,31,0.025374400615692138
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,31,0.0267984002828598
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,63,0.0253248006105423
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,63,0.026888000965118408
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,127,0.025254398584365845
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,127,0.026907199621200563
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,255,0.028859201073646545
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,255,0.030843201279640197
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,511,0.04199039936065674
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,511,0.04081760048866272
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,1023,0.0600928008556366
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,1023,0.05954239964485168
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,2047,0.09311839938163757
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,2047,0.08366720080375671
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,4095,0.15436480045318604
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,4095,0.1352463960647583
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,8191,0.2755104064941406
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,8191,0.23414239883422852
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,1,0.03278079926967621
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,1,0.03590399920940399
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,3,0.0327919989824295
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,3,0.036051198840141296
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,16383,0.517793607711792
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,16383,0.43295841217041015
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,7,0.032790398597717284
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,7,0.03581439852714539
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,15,0.03287360072135925
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,15,0.03589600026607513
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,31,0.03265759944915771
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,31,0.03575679957866669
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,63,0.03296000063419342
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,63,0.03601279854774475
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,127,0.03355680108070373
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,127,0.03594079911708832
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,255,0.04752480089664459
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,255,0.044284799695014955
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,511,0.06086559891700745
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,511,0.06515840291976929
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,4095,0.28150238990783694
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,1023,0.0958079993724823
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,1023,0.09267359972000122
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,2047,0.15744800567626954
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,2047,0.14125440120697022
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,4095,0.24199841022491456
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,8191,0.5305568218231201
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,1,0.01641920059919357
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,1,0.01700959950685501
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,7,0.016468800604343414
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,8191,0.44121599197387695
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,3,0.016395199298858642
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,3,0.017297600209712983
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,7,0.01735839992761612
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,16383,1.052017593383789
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,16383,0.8320128440856933
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,15,0.016443200409412384
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,15,0.017340800166130065
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,127,0.017046399414539337
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,31,0.016359999775886536
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,31,0.017772799730300902
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,63,0.016279999911785126
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,63,0.017505599558353423
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,127,0.018540799617767334
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,255,0.018222400546073915
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,255,0.019148799777030944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,511,0.020584000647068022
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,511,0.023209600150585173
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,1023,0.020427200198173522
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,1023,0.021935999393463135
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,2047,0.020707200467586517
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,16383,0.02519200146198273
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,2047,0.0221903994679451
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,4095,0.0216048002243042
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,4095,0.02263679951429367
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,8191,0.023099200427532197
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,8191,0.02412319928407669
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,16383,0.026188799738883974
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,1,0.01738079935312271
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,1,0.01804479956626892
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,3,0.01706400066614151
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,3,0.01797119975090027
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,7,0.01688639968633652
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,7,0.01797440052032471
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,15,0.01722240000963211
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,15,0.018161599338054658
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,31,0.01717599928379059
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,31,0.018033599853515624
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,63,0.01720480024814606
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,63,0.01783200055360794
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,127,0.017161600291728973
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,127,0.018078400194644927
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,255,0.018628799915313722
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,255,0.02027200013399124
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,511,0.02187040001153946
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,511,0.023865599930286408
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,1023,0.021214400231838227
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,1023,0.022580799460411072
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,2047,0.021265600621700288
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,2047,0.022759999334812164
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,4095,0.022819200158119203
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,4095,0.023427200317382813
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,8191,0.02372319996356964
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,8191,0.024823999404907225
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,16383,0.026383998990058898
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,16383,0.027420800924301148
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,1,0.01879359930753708
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,1,0.019684800505638124
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,15,0.01983039975166321
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,3,0.018628799915313722
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,3,0.019491200149059296
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,7,0.018719999492168425
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,7,0.019811199605464937
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,15,0.018947200477123262
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,31,0.018854400515556334
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,31,0.01974720060825348
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,63,0.01895360052585602
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,63,0.019625599682331085
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,127,0.018987199664115904
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,127,0.01966080069541931
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,255,0.020083199441432952
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,255,0.021376000344753267
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,511,0.023500800132751465
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,511,0.02560800015926361
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,1023,0.023119999468326567
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,1023,0.024167999625205994
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,2047,0.02321600019931793
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,2047,0.024711999297142028
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,4095,0.024579200148582458
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,4095,0.025567999482154845
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,8191,0.026284798979759216
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,8191,0.026681599020957947
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,16383,0.029967999458312987
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,16383,0.03043360114097595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,1,0.019153599441051484
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,1,0.019601599872112276
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,3,0.019115200638771056
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,3,0.019827200472354888
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,7,0.01907840073108673
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,7,0.019763199985027312
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,15,0.019092799723148347
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,15,0.019599999487400054
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,31,0.019065600633621217
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,31,0.019827200472354888
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,63,0.01921280026435852
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,63,0.019811199605464937
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,127,0.019073599576950075
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,127,0.019622400403022766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,255,0.020528000593185425
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,255,0.021639999747276307
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,511,0.023451200127601622
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,511,0.02574560046195984
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,1023,0.023364800214767455
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,1023,0.024425600469112397
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,2047,0.02367199957370758
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,2047,0.02502560019493103
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,4095,0.02556479871273041
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,4095,0.026012799143791197
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,8191,0.029124799370765685
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,8191,0.029844799637794496
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,16383,0.04064159989356995
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,16383,0.03613280057907105
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,1,0.017153599858283998
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,1,0.018062399327754976
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,3,0.017003199458122252
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,3,0.018078400194644927
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,7,0.017180800437927246
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,7,0.01791519969701767
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,15,0.017272000014781953
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,15,0.01828320026397705
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,31,0.01724960058927536
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,31,0.017903999984264375
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,63,0.017129600048065186
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,63,0.018105599284172057
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,127,0.01733600050210953
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,127,0.01804800033569336
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,255,0.018743999302387238
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,255,0.01995680034160614
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,511,0.02183839976787567
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,511,0.023975999653339387
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,1023,0.021289600431919097
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,1023,0.022651199996471406
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,2047,0.02175839990377426
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,2047,0.023020799458026885
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,4095,0.022601599991321563
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,4095,0.02364639937877655
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,8191,0.023691199719905853
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,8191,0.025190401077270507
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,16383,0.02658880054950714
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,16383,0.027324798703193664
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,1,0.0189968004822731
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,1,0.019849599897861482
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,3,0.019142399728298187
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,3,0.019763199985027312
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,7,0.01892479956150055
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,7,0.01968960016965866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,15,0.019182400405406953
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,15,0.019483199715614317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,31,0.018984000384807586
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,31,0.01979839950799942
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,63,0.019044800102710722
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,63,0.019752000272274018
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,127,0.019196799397468566
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,127,0.019883200526237488
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,1023,0.024736000597476958
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,255,0.020660799741744996
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,255,0.021719999611377716
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,511,0.023393599689006804
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,511,0.025545600056648254
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,1023,0.022945599257946016
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,2047,0.023414400219917298
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,2047,0.024803200364112855
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,4095,0.024376000463962554
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,4095,0.025281599164009093
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,8191,0.026412799954414368
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,8191,0.027323201298713684
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,16383,0.03060159981250763
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,16383,0.03080959916114807
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,1,0.018955199420452117
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,1,0.019998399913311003
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,3,0.01892479956150055
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,3,0.019921599328517912
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,7,0.018905599415302277
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,7,0.02012320011854172
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,15,0.019075199961662292
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,15,0.019908800721168518
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,31,0.018935999274253844
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,31,0.020108799636363982
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,63,0.019096000492572783
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,63,0.02001120001077652
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,127,0.019118399918079378
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,127,0.019977599382400513
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,255,0.02051360011100769
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,255,0.02194720059633255
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,511,0.023364800214767455
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,4095,0.025968000292778015
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,511,0.026156800985336303
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,1023,0.023296000063419343
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,1023,0.024609600007534028
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,2047,0.023796799778938293
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,2047,0.025040000677108765
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,4095,0.026128000020980834
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,8191,0.030124801397323608
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,8191,0.029065600037574767
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,16383,0.04205600023269653
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,16383,0.03591200113296509
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,1,0.019273599982261656
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,1,0.019857600331306458
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,3,0.019300800561904908
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,3,0.01971520036458969
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,7,0.019047999382019044
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,7,0.01995680034160614
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,15,0.01930239945650101
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,15,0.019896000623703003
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,31,0.01918399930000305
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,31,0.019952000677585603
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,63,0.019312000274658202
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,63,0.01993599981069565
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,127,0.01937599927186966
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,127,0.020047999918460846
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,255,0.020670400559902193
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,255,0.02186720073223114
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,511,0.023876799643039702
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,511,0.02555040121078491
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,1023,0.023420800268650056
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,1023,0.024723200500011443
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,2047,0.025319999456405638
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,2047,0.0254831999540329
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,4095,0.029211199283599852
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,4095,0.02956160008907318
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,8191,0.039843198657035825
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,8191,0.035996800661087035
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,7,0.020001600682735442
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,16383,0.05650879740715027
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,16383,0.05422400236129761
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,1,0.01991039961576462
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,1,0.020871999859809875
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,3,0.019940799474716185
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,3,0.020900799334049223
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,7,0.020686399936676026
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,15,0.020265600085258482
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,15,0.02084160000085831
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,31,0.019939200580120088
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,31,0.020662400126457214
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,63,0.01992959976196289
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,63,0.020715199410915375
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,127,0.02029920071363449
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,127,0.02113119959831238
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,255,0.021879999339580535
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,255,0.022697600722312927
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,511,0.02475520074367523
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,511,0.026576000452041625
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,1023,0.026151999831199646
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,1023,0.026310399174690247
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,2047,0.027884799242019653
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,2047,0.03020159900188446
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,4095,0.04168800115585327
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,4095,0.03614560067653656
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,8191,0.05796319842338562
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,8191,0.05439199805259705
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,16383,0.09050559997558594
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,16383,0.0805791974067688
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,1,0.020134399831295013
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,1,0.02112479954957962
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,3,0.020329600572586058
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,3,0.021195200085639954
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,7,0.020319999754428865
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,7,0.021270400285720824
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,15,0.020467199385166168
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,15,0.02130240052938461
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,31,0.020153599977493285
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,31,0.02115200012922287
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,63,0.020131200551986694
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,63,0.02098720073699951
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,511,0.027143999934196472
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,127,0.02056480050086975
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,127,0.021134400367736818
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,255,0.022203199565410614
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,255,0.02316640019416809
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,511,0.025225600600242613
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,1023,0.028398400545120238
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,1023,0.02995840013027191
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,2047,0.04129279851913452
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,2047,0.03575679957866669
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,4095,0.05729759931564331
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,4095,0.05474720001220703
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,1,0.02141280025243759
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,8191,0.09045600295066833
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,8191,0.0800432026386261
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,16383,0.15129120349884034
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,16383,0.1324928045272827
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,1,0.020587199926376344
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,15,0.02162880003452301
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,3,0.020772799849510193
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,3,0.021459199488162994
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,7,0.020828799903392793
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,7,0.021588799357414246
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,15,0.02067199945449829
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,31,0.02080159932374954
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,31,0.021583999693393707
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,63,0.020660799741744996
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,63,0.021427200734615327
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,127,0.020768000185489653
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,127,0.021595199406147004
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,255,0.02242400050163269
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,255,0.023601600527763368
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,511,0.025911998748779298
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,511,0.027507200837135315
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,1023,0.04025599956512451
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,1023,0.03514719903469086
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,2047,0.05635679960250854
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,2047,0.053947198390960696
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,4095,0.08870400190353393
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,4095,0.07973600029945374
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,8191,0.15054880380630492
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,8191,0.13175840377807618
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,16383,0.2699343919754028
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,7,0.024695999920368195
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,7,0.026151999831199646
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,1,0.024531200528144836
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,1,0.025966399908065797
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,16383,0.2295232057571411
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,3,0.024561600387096406
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,3,0.026015999913215637
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,15,0.02605760097503662
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,15,0.024348799884319306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,31,0.02452320009469986
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,31,0.026499199867248534
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,63,0.02449920028448105
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,63,0.0263808012008667
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,127,0.02475520074367523
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,127,0.02632960081100464
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,511,0.040092799067497256
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,255,0.028518399596214293
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,255,0.03043839931488037
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,511,0.03850559890270233
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,1023,0.058676797151565555
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,1023,0.05885440111160278
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,2047,0.09038559794425964
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,2047,0.08311840295791625
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,4095,0.153713595867157
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,4095,0.13498079776763916
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,8191,0.2745232105255127
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,16383,0.5269472122192382
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,8191,0.2348639965057373
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,1,0.018875199556350707
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,1,0.019633600115776063
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,3,0.018782399594783783
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,16383,0.42997918128967283
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,3,0.019812799990177155
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,7,0.019075199961662292
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,7,0.019870400428771973
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,15,0.018947200477123262
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,15,0.019827200472354888
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,31,0.01900320053100586
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,31,0.019864000380039215
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,63,0.018985599279403687
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,63,0.019912000000476836
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,511,0.023636800050735474
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,127,0.019044800102710722
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,127,0.019832000136375427
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,255,0.02050720006227493
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,255,0.02192319929599762
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,511,0.02547839879989624
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,1023,0.02306240051984787
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,1023,0.024641600251197816
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,2047,0.02343360036611557
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,2047,0.025102400779724122
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,4095,0.024769599735736846
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,4095,0.02561599910259247
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,8191,0.026545599102973938
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,8191,0.027024000883102417
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,16383,0.03049120008945465
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,16383,0.03120799958705902
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,1,0.019054399430751802
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,1,0.019815999269485473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,3,0.018990400433540344
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,3,0.019995200634002685
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,7,0.019105599820613862
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,7,0.019900800287723543
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,15,0.018985599279403687
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,15,0.019971199333667755
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,31,0.018984000384807586
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,31,0.019875200092792512
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,63,0.018878400325775146
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,63,0.020127999782562255
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,127,0.01907680034637451
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,127,0.019968000054359437
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,255,0.02069920003414154
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,255,0.021694399416446686
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,511,0.023707200586795808
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,511,0.025464001297950744
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,1023,0.023332799971103668
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,1023,0.02502079904079437
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,2047,0.024065600335597993
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,2047,0.025308799743652344
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,4095,0.025968000292778015
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,4095,0.026369601488113403
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,8191,0.02948960065841675
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,8191,0.030206400156021117
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,16383,0.042099198698997496
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,16383,0.036375999450683594
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,1,0.01929119974374771
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,1,0.019916799664497376
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,3,0.01927199959754944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,3,0.0200080007314682
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,7,0.019105599820613862
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,7,0.020006400346755982
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,15,0.019443200528621675
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,15,0.020295999944210052
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,31,0.019054399430751802
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,31,0.019870400428771973
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,63,0.01900160014629364
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,63,0.019827200472354888
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,127,0.01921759992837906
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,127,0.019980800151824952
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,255,0.02073120027780533
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,255,0.02242400050163269
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,511,0.023892800509929656
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,511,0.02556479871273041
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,1023,0.023588800430297853
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,1023,0.024953599274158477
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,2047,0.025574401021003723
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,2047,0.02569279968738556
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,1,0.019572800397872923
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,4095,0.028968000411987306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,4095,0.029948800802230835
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,8191,0.03996959924697876
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,8191,0.03554719984531403
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,16383,0.05652160048484802
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,16383,0.05424320101737976
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,1,0.020286400616168977
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,3,0.019649599492549897
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,3,0.020204800367355346
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,7,0.01921440064907074
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,7,0.020131200551986694
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,15,0.01934240013360977
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,15,0.020193600654602052
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,31,0.01942719966173172
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,31,0.020259200036525725
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,63,0.019356800615787505
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,63,0.02062560021877289
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,127,0.019444799423217772
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,127,0.02024639993906021
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,255,0.020937600731849672
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,255,0.022628800570964815
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,511,0.02422879934310913
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,511,0.025761601328849793
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,1023,0.025492799282073975
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,1023,0.02563520073890686
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,8191,0.055852800607681274
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,2047,0.027212798595428467
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,16383,0.08873599767684937
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,2047,0.02946400046348572
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,4095,0.03987680077552795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,4095,0.035395199060440065
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,8191,0.05392000079154968
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,16383,0.07898880243301391
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,1,0.02316800057888031
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,1,0.023579199612140656
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,3,0.023175999522209167
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,3,0.023481599986553192
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,7,0.022971199452877046
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,7,0.023515200614929198
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,15,0.022668799757957457
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,15,0.02353599965572357
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,31,0.02274399995803833
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,31,0.023647999763488768
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,63,0.0230880007147789
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,63,0.02377759963274002
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,127,0.023158399760723113
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,127,0.02391040027141571
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,255,0.025206398963928223
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,255,0.026251199841499328
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,511,0.02805120050907135
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,511,0.029627200961112977
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,1023,0.04364160001277924
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,1023,0.03765439987182617
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,2047,0.05934720039367676
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,2047,0.05616000294685364
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,4095,0.09250079989433288
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,4095,0.08155999779701233
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,8191,0.15350240468978882
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,8191,0.13334879875183106
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,1,0.027084800601005554
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,1,0.028537601232528687
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,3,0.02677600085735321
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,3,0.02818560004234314
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,7,0.026660799980163574
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,7,0.02803199887275696
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,15,0.026649600267410277
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,15,0.02824319899082184
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,31,0.027108800411224366
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,31,0.028431999683380126
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,63,0.02707040011882782
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,63,0.028347200155258177
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,127,0.026902401447296144
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,1023,0.06233440041542053
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,127,0.028808000683784484
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,2047,0.09500160217285156
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,255,0.030100798606872557
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,255,0.03256799876689911
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,4095,0.13684480190277098
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,511,0.043968001008033754
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,511,0.04063520133495331
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,1023,0.0615119993686676
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,2047,0.0852128028869629
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,4095,0.156385600566864
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,1,0.034185600280761716
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,8191,0.27709600925445554
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,8191,0.23801279067993164
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,1,0.03774400055408478
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,15,0.03758080005645752
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,3,0.03440960049629212
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,31,0.037729600071907045
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,3,0.03747360110282898
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,63,0.037492799758911136
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,7,0.034411200881004335
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,7,0.03732320070266724
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,15,0.034385600686073305
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,31,0.03449119925498963
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,63,0.03478879928588867
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,127,0.03485920131206512
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,127,0.037694400548934935
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,255,0.04962559938430786
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,255,0.04627679884433746
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,511,0.0635424017906189
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,511,0.06598079800605774
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,1023,0.09956160187721252
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,4095,0.24354240894317628
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,1023,0.09361119866371155
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,2047,0.158624005317688
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,2047,0.14225120544433595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,4095,0.2809887886047363
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,1,0.049358400702476504
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,1,0.055339199304580686
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,8191,0.5356480121612549
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,3,0.04859040081501007
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,8191,0.4416719913482666
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,3,0.05574880242347717
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,7,0.048681598901748654
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,7,0.0552079975605011
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,15,0.049236801266670224
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,15,0.05681920051574707
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,31,0.04904319941997528
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,31,0.055497598648071286
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,63,0.04930239915847778
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,63,0.05539360046386719
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,127,0.05635039806365967
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,127,0.05619040131568909
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,255,0.07290719747543335
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,255,0.07732800245285035
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,511,0.10227839946746826
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,511,0.10776480436325073
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,1023,0.1674288034439087
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,1023,0.16060639619827272
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,2047,0.2898832082748413
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,2047,0.2549040079116821
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,4095,0.5380608081817627
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,4095,0.4516416072845459
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,1,128,1,float16,float16,1,0.029897600412368774
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,1,128,1,float16,fp8,1,0.031167998909950256
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,1,128,1,float16,float16,3,0.02980639934539795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,1,128,1,float16,fp8,3,0.03151040077209473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,1,128,1,float16,fp8,7,0.03128480017185211
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,1,128,1,float16,float16,7,0.029811200499534608
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,8191,1.0256943702697754
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,1,128,1,float16,fp8,15,0.031806400418281554
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,1,128,1,float16,float16,15,0.029471999406814574
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,8191,0.8514191627502441
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,1,128,1,float16,float16,31,0.029976001381874083
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,1,128,1,float16,fp8,31,0.031718400120735166
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,1,128,1,float16,float16,63,0.02999520003795624
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,1,128,1,float16,fp8,63,0.03152639865875244
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,1,128,1,float16,fp8,127,0.03147999942302704
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,1,128,1,float16,float16,127,0.033327999711036685
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,1,128,1,float16,float16,255,0.03341119885444641
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,1,128,1,float16,fp8,255,0.0364111989736557
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,1,128,1,float16,float16,511,0.048721599578857425
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,1,128,1,float16,fp8,511,0.04437119960784912
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,1,128,1,float16,float16,1023,0.06721760034561157
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,1,128,1,float16,fp8,1023,0.06547840237617493
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,1,128,1,float16,float16,2047,0.09838560223579407
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,1,128,1,float16,fp8,2047,0.09009439945220947
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,2,128,1,float16,float16,1,0.03723680078983307
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,2,128,1,float16,fp8,1,0.04073280096054077
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,2,128,1,float16,float16,3,0.037243199348449704
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,2,128,1,float16,fp8,3,0.0406143993139267
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,2,128,1,float16,float16,7,0.03694080114364624
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,2,128,1,float16,fp8,7,0.04036639928817749
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,2,128,1,float16,float16,15,0.03747360110282898
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,2,128,1,float16,fp8,15,0.040433600544929504
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,2,128,1,float16,float16,31,0.0373744010925293
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,2,128,1,float16,fp8,31,0.040756800770759584
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,2,128,1,float16,float16,63,0.03743360042572021
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,2,128,1,float16,fp8,63,0.04054880142211914
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,2,128,1,float16,float16,127,0.03815680146217346
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,2,128,1,float16,fp8,127,0.040510401129722595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,2,128,1,float16,float16,255,0.05333120226860046
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,2,128,1,float16,float16,2047,0.16398719549179078
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,2,128,1,float16,fp8,255,0.04903360009193421
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,2,128,1,float16,float16,511,0.06743040084838867
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,2,128,1,float16,fp8,511,0.07009599804878235
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,2,128,1,float16,float16,1023,0.10349440574645996
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,2,128,1,float16,fp8,1023,0.09723039865493774
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,2,128,1,float16,fp8,2047,0.14640640020370482
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,4,128,1,float16,float16,1,0.052121597528457644
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,4,128,1,float16,float16,3,0.05249119997024536
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,4,128,1,float16,fp8,1,0.05835840106010437
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,4,128,1,float16,fp8,3,0.05804160237312317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,4,128,1,float16,float16,7,0.05230399966239929
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,4,128,1,float16,fp8,7,0.058246397972106935
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,4,128,1,float16,float16,15,0.05157600045204162
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,4,128,1,float16,float16,31,0.05207359790802002
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,4,128,1,float16,fp8,15,0.05802400112152099
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,4,128,1,float16,fp8,31,0.058713597059249875
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,4,128,1,float16,float16,63,0.05295199751853943
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,4,128,1,float16,fp8,63,0.058641600608825686
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,4,128,1,float16,float16,127,0.05968639850616455
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,4,128,1,float16,fp8,127,0.05925599932670593
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,4,128,1,float16,float16,255,0.07631199955940246
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,4,128,1,float16,fp8,255,0.08046079874038696
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,4,128,1,float16,float16,511,0.10648800134658813
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,4,128,1,float16,fp8,511,0.11153440475463867
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,4,128,1,float16,float16,1023,0.17197760343551635
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,4,128,1,float16,fp8,1023,0.1631343960762024
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,4,128,1,float16,float16,2047,0.2935904026031494
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,8,128,1,float16,float16,1,0.0803551971912384
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,8,128,1,float16,fp8,1,0.09423360228538513
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,4,128,1,float16,fp8,2047,0.2562848091125488
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,8,128,1,float16,float16,3,0.08059999942779542
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,8,128,1,float16,fp8,3,0.09395359754562378
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,8,128,1,float16,float16,7,0.08093119859695434
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,8,128,1,float16,fp8,7,0.09386079907417297
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,8,128,1,float16,float16,15,0.08084959983825683
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,8,128,1,float16,fp8,15,0.093641597032547
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,8,128,1,float16,float16,127,0.09003999829292297
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,8,128,1,float16,float16,31,0.08145279884338379
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,8,128,1,float16,fp8,31,0.09405440092086792
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,8,128,1,float16,float16,63,0.08840479850769042
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,8,128,1,float16,fp8,63,0.09587680101394654
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,8,128,1,float16,fp8,127,0.10436960458755493
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,8,128,1,float16,float16,255,0.11953920125961304
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,8,128,1,float16,fp8,255,0.13238240480422975
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,8,128,1,float16,float16,511,0.17904000282287597
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,8,128,1,float16,fp8,511,0.1932528018951416
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,8,128,1,float16,float16,1023,0.3088320016860962
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,8,128,1,float16,fp8,1023,0.2948064088821411
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,1,128,1,float16,float16,1,0.04427199959754944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,1,128,1,float16,fp8,1,0.04715999960899353
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,8,128,1,float16,float16,2047,0.5557231903076172
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,1,128,1,float16,float16,3,0.044460800290107724
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,16,8,128,1,float16,fp8,2047,0.4739488124847412
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,1,128,1,float16,fp8,3,0.04712960124015808
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,1,128,1,float16,float16,7,0.04441919922828674
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,1,128,1,float16,fp8,7,0.047260800004005434
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,1,128,1,float16,float16,15,0.04445759952068329
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,1,128,1,float16,fp8,15,0.04772000014781952
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,1,128,1,float16,float16,31,0.044249600172042845
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,1,128,1,float16,fp8,31,0.0474047988653183
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,1,128,1,float16,float16,63,0.044433599710464476
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,1,128,1,float16,fp8,63,0.04766719937324524
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,1,128,1,float16,float16,127,0.04514079988002777
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,1,128,1,float16,fp8,127,0.04753119945526123
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,1,128,1,float16,float16,255,0.06063839793205261
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,1,128,1,float16,fp8,255,0.05629439949989319
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,1,128,1,float16,float16,511,0.07620480060577392
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,1,128,1,float16,fp8,511,0.07648159861564637
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,1,128,1,float16,float16,1023,0.11274240016937256
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,1,128,1,float16,fp8,1023,0.1059999942779541
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,2,128,1,float16,float16,1,0.05871679782867432
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,2,128,1,float16,fp8,1,0.06450080275535583
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,2,128,1,float16,float16,3,0.058852797746658324
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,2,128,1,float16,fp8,3,0.06477919816970826
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,2,128,1,float16,float16,7,0.05862399935722351
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,2,128,1,float16,fp8,7,0.06493759751319886
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,2,128,1,float16,float16,15,0.058427202701568606
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,2,128,1,float16,fp8,15,0.06461920142173767
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,2,128,1,float16,float16,31,0.05860480070114136
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,2,128,1,float16,fp8,31,0.06466079950332641
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,2,128,1,float16,float16,63,0.06114240288734436
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,2,128,1,float16,fp8,63,0.06509280204772949
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,2,128,1,float16,float16,127,0.0671280026435852
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,2,128,1,float16,fp8,127,0.0669103980064392
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,2,128,1,float16,float16,255,0.08393120169639587
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,2,128,1,float16,fp8,255,0.0877664029598236
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,4,128,1,float16,fp8,1,0.10054080486297608
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,2,128,1,float16,float16,511,0.11327199935913086
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,2,128,1,float16,fp8,511,0.11709439754486084
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,2,128,1,float16,float16,1023,0.18102240562438965
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,4,128,1,float16,fp8,7,0.09972159862518311
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,2,128,1,float16,fp8,1023,0.17221920490264891
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,4,128,1,float16,fp8,15,0.10033279657363892
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,4,128,1,float16,float16,1,0.08739839792251587
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,4,128,1,float16,float16,3,0.08742880225181579
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,4,128,1,float16,fp8,3,0.09981759786605834
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,4,128,1,float16,float16,7,0.08735039830207825
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,4,128,1,float16,float16,15,0.0872223973274231
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,4,128,1,float16,float16,31,0.08756800293922425
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,4,128,1,float16,fp8,31,0.10029120445251465
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,4,128,1,float16,fp8,255,0.13880000114440919
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,4,128,1,float16,float16,63,0.09431679844856262
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,4,128,1,float16,fp8,63,0.10103199481964112
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,4,128,1,float16,float16,127,0.09616159796714782
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,4,128,1,float16,fp8,127,0.11037440299987793
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,4,128,1,float16,float16,255,0.1280336022377014
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,4,128,1,float16,float16,511,0.18433279991149903
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,4,128,1,float16,fp8,511,0.19957120418548585
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,4,128,1,float16,float16,1023,0.31448318958282473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,4,128,1,float16,fp8,1023,0.2997600078582764
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,8,128,1,float16,float16,1,0.1411072015762329
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,8,128,1,float16,fp8,1,0.16908799409866332
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,8,128,1,float16,float16,3,0.14268800020217895
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,8,128,1,float16,fp8,3,0.168995201587677
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,8,128,1,float16,float16,7,0.1421023964881897
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,8,128,1,float16,fp8,7,0.17005759477615356
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,8,128,1,float16,float16,15,0.14245760440826416
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,8,128,1,float16,fp8,15,0.1703104019165039
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,8,128,1,float16,float16,31,0.14799519777297973
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,8,128,1,float16,fp8,31,0.16972320079803466
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,8,128,1,float16,float16,63,0.1544000029563904
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,8,128,1,float16,fp8,63,0.18271360397338868
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,8,128,1,float16,float16,127,0.1577296018600464
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,8,128,1,float16,fp8,127,0.18570879697799683
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,8,128,1,float16,float16,255,0.21495358943939208
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,8,128,1,float16,fp8,255,0.24124159812927246
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,8,128,1,float16,float16,511,0.32699999809265134
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,8,128,1,float16,fp8,511,0.3590384006500244
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,1,0.019307200610637665
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,1,0.020150400698184967
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,8,128,1,float16,float16,1023,0.5854015827178956
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,3,0.019350400567054747
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,16,8,128,1,float16,fp8,1023,0.5565199851989746
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,3,0.02017119973897934
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,7,0.019361600279808044
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,31,0.02024639993906021
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,7,0.020190399885177613
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,15,0.01937440037727356
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,15,0.02011519968509674
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,31,0.01928640007972717
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,63,0.01918399930000305
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,63,0.020147199928760528
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,127,0.0191103994846344
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,127,0.019857600331306458
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,255,0.021052800118923187
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,255,0.022043199837207796
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,511,0.023953600227832793
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,511,0.025872001051902772
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,1023,0.023447999358177186
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,1023,0.025014400482177734
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,2047,0.024030399322509766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,2047,0.02550399899482727
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,4095,0.026345598697662353
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,4095,0.026686400175094604
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,8191,0.02963840067386627
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,8191,0.030623999238014222
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,16383,0.04276480078697205
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,16383,0.03665440082550049
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,1,0.01927199959754944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,1,0.019971199333667755
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,3,0.01956000030040741
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,3,0.01993120014667511
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,7,0.019305600225925444
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,7,0.020155200362205507
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,15,0.019472000002861024
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,15,0.020257599651813507
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,31,0.019566400349140166
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,31,0.020078399777412416
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,63,0.01913599967956543
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,63,0.02014880031347275
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,127,0.01927199959754944
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,127,0.020132799446582795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,255,0.0209647998213768
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,255,0.02200160026550293
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,511,0.02398719936609268
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,511,0.02568320035934448
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,1023,0.023809599876403808
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,1023,0.02502720057964325
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,2047,0.02561439871788025
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,2047,0.02587360143661499
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,4095,0.029182401299476624
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,4095,0.02975519895553589
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,8191,0.04101920127868652
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,8191,0.03590399920940399
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,16383,0.056676799058914186
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,16383,0.05333600044250488
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,1,0.019280000030994414
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,1,0.020467199385166168
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,3,0.01961279958486557
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,3,0.0204927995800972
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,7,0.01958719938993454
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,7,0.020584000647068022
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,15,0.01967200040817261
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,15,0.020294399559497835
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,31,0.019655999541282655
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,31,0.020425599813461304
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,63,0.019409599900245666
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,63,0.020393599569797517
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,127,0.019534400105476378
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,127,0.02014240026473999
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,255,0.021107199788093566
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,255,0.022540800273418427
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,511,0.024344000220298766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,511,0.02630879878997803
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,1023,0.025393599271774293
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,1023,0.02568959891796112
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,2047,0.02741119861602783
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,2047,0.02945919930934906
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,4095,0.0404559999704361
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,4095,0.03545120060443878
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,8191,0.05596479773521423
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,8191,0.05392320156097412
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,16383,0.08888959884643555
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,16383,0.07922559976577759
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,1,0.019833600521087645
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,1,0.02051199972629547
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,3,0.01989919990301132
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,3,0.020715199410915375
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,7,0.019750399887561797
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,7,0.020681600272655486
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,15,0.019761599600315094
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,15,0.02077440023422241
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,31,0.02011840045452118
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,31,0.0207056000828743
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,63,0.01977919936180115
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,63,0.020815999805927278
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,127,0.019702400267124175
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,127,0.020766399800777435
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,255,0.021804800629615782
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,255,0.022625599801540375
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,511,0.02484800070524216
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,511,0.026467201113700867
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,1023,0.027820798754692077
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,8191,0.08955519795417785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,1023,0.02937760055065155
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,2047,0.04023520052433014
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,2047,0.03559040129184723
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,4095,0.05644000172615051
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,4095,0.05435839891433716
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,8191,0.07947840094566345
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,16383,0.15011839866638182
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,16383,0.1310464024543762
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,1,128,1,float16,float16,1,0.06583359837532043
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,1,128,1,float16,fp8,1,0.07123680114746093
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,1,128,1,float16,float16,3,0.06575199961662292
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,1,128,1,float16,fp8,3,0.07141119837760926
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,1,128,1,float16,float16,7,0.0656336009502411
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,1,128,1,float16,fp8,7,0.0715503990650177
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,1,128,1,float16,float16,63,0.06883839964866638
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,1,128,1,float16,float16,15,0.0654752016067505
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,1,128,1,float16,fp8,15,0.07187039852142334
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,1,128,1,float16,float16,31,0.06524479985237122
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,1,128,1,float16,fp8,31,0.07148159742355346
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,1,128,1,float16,fp8,63,0.0716048002243042
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,1,128,1,float16,float16,511,0.12265440225601196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,1,128,1,float16,float16,127,0.07519840002059937
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,1,128,1,float16,fp8,127,0.07447680234909057
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,2,128,1,float16,fp8,1,0.10586559772491455
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,1,128,1,float16,float16,255,0.09137279987335205
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,1,128,1,float16,fp8,255,0.09435039758682251
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,1,128,1,float16,fp8,511,0.12661279439926149
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,2,128,1,float16,float16,1,0.09453920125961304
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,2,128,1,float16,float16,3,0.09447839856147766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,2,128,1,float16,fp8,3,0.10592800378799438
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,2,128,1,float16,float16,7,0.09418720006942749
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,2,128,1,float16,fp8,7,0.10582400560379028
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,2,128,1,float16,float16,15,0.09462720155715942
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,2,128,1,float16,fp8,15,0.10616960525512695
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,2,128,1,float16,float16,31,0.09478240013122559
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,2,128,1,float16,fp8,31,0.1066815972328186
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,2,128,1,float16,fp8,63,0.10911999940872193
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,2,128,1,float16,float16,63,0.10201120376586914
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,2,128,1,float16,float16,127,0.10491520166397095
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,2,128,1,float16,fp8,127,0.11717920303344727
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,2,128,1,float16,float16,255,0.13903039693832397
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,2,128,1,float16,fp8,255,0.1444368004798889
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,2,128,1,float16,float16,511,0.19414880275726318
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,2,128,1,float16,fp8,511,0.20753118991851807
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,4,128,1,float16,float16,1,0.15081440210342406
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,4,128,1,float16,fp8,1,0.175654399394989
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,4,128,1,float16,float16,3,0.15032479763031006
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,4,128,1,float16,fp8,3,0.174399995803833
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,4,128,1,float16,float16,7,0.1510800004005432
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,4,128,1,float16,fp8,7,0.17543519735336305
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,4,128,1,float16,float16,15,0.1506832003593445
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,4,128,1,float16,fp8,15,0.17547199726104737
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,4,128,1,float16,float16,31,0.15671839714050292
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,4,128,1,float16,fp8,31,0.17665280103683473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,4,128,1,float16,float16,63,0.15903520584106445
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,4,128,1,float16,fp8,63,0.1874624013900757
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,4,128,1,float16,float16,511,0.33443360328674315
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,4,128,1,float16,float16,127,0.16364959478378296
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,4,128,1,float16,fp8,127,0.19153759479522706
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,4,128,1,float16,float16,255,0.22210400104522704
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,4,128,1,float16,fp8,255,0.24542880058288574
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,8,128,1,float16,float16,1,0.2589936017990112
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,4,128,1,float16,fp8,511,0.3683167934417725
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,8,128,1,float16,fp8,1,0.31041760444641114
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,8,128,1,float16,float16,3,0.26074559688568116
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,8,128,1,float16,fp8,3,0.3123823881149292
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,8,128,1,float16,float16,7,0.2602720022201538
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,8,128,1,float16,fp8,7,0.31186718940734864
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,8,128,1,float16,float16,15,0.266211199760437
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,8,128,1,float16,fp8,15,0.3119456052780151
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,8,128,1,float16,float16,31,0.2784111976623535
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,8,128,1,float16,fp8,31,0.32636640071868894
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,8,128,1,float16,float16,63,0.2819119930267334
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,8,128,1,float16,fp8,63,0.33848159313201903
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,8,128,1,float16,float16,127,0.28988161087036135
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,8,128,1,float16,fp8,127,0.34186079502105715
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,8,128,1,float16,float16,255,0.4004784107208252
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,8,128,1,float16,fp8,255,0.44784159660339357
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,1,128,1,float16,float16,1,0.10651199817657471
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,1,128,1,float16,fp8,1,0.118995201587677
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,8,128,1,float16,float16,511,0.6279808044433594
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,1,128,1,float16,float16,3,0.10586719512939453
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,16,8,128,1,float16,fp8,511,0.6803855895996094
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,1,128,1,float16,fp8,3,0.11782879829406738
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,1,128,1,float16,fp8,31,0.11821279525756836
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,1,128,1,float16,float16,7,0.10654079914093018
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,1,128,1,float16,fp8,63,0.12623679637908936
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,1,128,1,float16,fp8,7,0.11798720359802246
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,1,128,1,float16,float16,15,0.106387197971344
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,1,128,1,float16,fp8,15,0.11895040273666382
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,1,128,1,float16,float16,31,0.10920480489730836
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,1,128,1,float16,float16,63,0.11592799425125122
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,1,128,1,float16,float16,127,0.11987999677658082
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,1,128,1,float16,fp8,127,0.1299407958984375
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,1,128,1,float16,float16,255,0.14926400184631347
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,1,128,1,float16,fp8,255,0.15715839862823486
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,2,128,1,float16,float16,1,0.1620944023132324
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,2,128,1,float16,fp8,1,0.18508479595184327
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,2,128,1,float16,float16,3,0.16347039937973024
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,2,128,1,float16,fp8,3,0.18387999534606933
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,2,128,1,float16,float16,7,0.16141920089721679
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,2,128,1,float16,fp8,7,0.1852128028869629
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,2,128,1,float16,float16,15,0.16235519647598268
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,2,128,1,float16,fp8,15,0.18444639444351196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,2,128,1,float16,float16,31,0.16946719884872435
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,2,128,1,float16,fp8,31,0.18785120248794557
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,2,128,1,float16,float16,63,0.17201440334320067
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,2,128,1,float16,fp8,63,0.19848480224609374
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,2,128,1,float16,float16,127,0.17792799472808837
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,2,128,1,float16,fp8,127,0.201580810546875
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,2,128,1,float16,float16,255,0.2385983943939209
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,2,128,1,float16,fp8,255,0.25716960430145264
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,4,128,1,float16,float16,1,0.2793152093887329
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,4,128,1,float16,float16,7,0.27691519260406494
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,4,128,1,float16,fp8,1,0.3224240064620972
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,4,128,1,float16,float16,3,0.27734079360961916
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,4,128,1,float16,fp8,15,0.33020639419555664
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,4,128,1,float16,fp8,3,0.3252880096435547
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,4,128,1,float16,fp8,31,0.340884804725647
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,4,128,1,float16,fp8,7,0.3235167980194092
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,4,128,1,float16,float16,15,0.28210399150848386
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,4,128,1,float16,float16,31,0.28809120655059817
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,4,128,1,float16,float16,63,0.29090080261230467
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,4,128,1,float16,fp8,63,0.3507071971893311
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,4,128,1,float16,float16,127,0.2970848083496094
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,4,128,1,float16,fp8,255,0.4577487945556641
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,4,128,1,float16,fp8,127,0.34999840259552
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,4,128,1,float16,float16,255,0.4046304225921631
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,8,128,1,float16,float16,1,0.49724640846252444
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,8,128,1,float16,fp8,1,0.5919551849365234
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,8,128,1,float16,float16,3,0.5020095825195312
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,8,128,1,float16,fp8,3,0.5925360202789307
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,8,128,1,float16,float16,7,0.5097695827484131
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,8,128,1,float16,fp8,7,0.5905248165130615
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,8,128,1,float16,float16,15,0.5157567977905273
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,8,128,1,float16,float16,31,0.5211935997009277
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,8,128,1,float16,fp8,15,0.6090479850769043
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,8,128,1,float16,fp8,31,0.6362063884735107
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,8,128,1,float16,float16,63,0.5287407875061035
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,8,128,1,float16,fp8,63,0.6380496025085449
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,8,128,1,float16,float16,127,0.5431488037109375
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,1,0.01940480023622513
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,8,128,1,float16,fp8,127,0.6402224063873291
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,1,0.020073600113391876
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,3,0.01960480064153671
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,8,128,1,float16,float16,255,0.7597856044769287
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,3,0.02017119973897934
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,16,8,128,1,float16,fp8,255,0.8587039947509766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,7,0.019734400510787963
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,7,0.02011200040578842
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,15,0.020095999538898467
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,15,0.01966720074415207
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,31,0.019380800426006317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,31,0.02012320011854172
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,63,0.01961439996957779
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,63,0.019995200634002685
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,127,0.019443200528621675
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,127,0.020388799905776977
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,255,0.021073600649833678
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,255,0.022191999852657317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,511,0.02409600019454956
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,511,0.025982400774955748
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,1023,0.024014399945735933
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,1023,0.02497439980506897
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,2047,0.02574400007724762
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,2047,0.026052799820899964
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,4095,0.029262399673461913
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,4095,0.029631999135017396
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,8191,0.041412800550460815
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,8191,0.035806399583816526
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,16383,0.05736320018768311
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,16383,0.05457760095596313
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,1,0.019548800587654114
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,1,0.02061759978532791
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,3,0.01971839964389801
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,3,0.020278400182723998
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,7,0.01966080069541931
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,7,0.02053920030593872
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,15,0.019659200310707094
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,15,0.020316800475120543
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,31,0.019524799287319185
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,31,0.020588800311088562
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,63,0.019655999541282655
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,63,0.020768000185489653
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,127,0.019659200310707094
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,127,0.02051360011100769
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,255,0.021211199462413788
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,255,0.022686399519443512
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,511,0.02436320036649704
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,511,0.02658880054950714
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,1023,0.02563199996948242
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,1023,0.02603999972343445
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,2047,0.027811199426651
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,2047,0.029246398806571962
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,4095,0.040811198949813846
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,4095,0.0355679988861084
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,8191,0.05686079859733582
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,8191,0.05427039861679077
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,16383,0.08922079801559449
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,16383,0.07900320291519165
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,1,0.01993280053138733
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,1,0.020585599541664123
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,3,0.019753600656986236
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,3,0.02073120027780533
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,7,0.01995680034160614
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,7,0.020609599351882935
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,15,0.019923199713230134
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,15,0.020534400641918183
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,31,0.0198512002825737
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,255,0.021916800737380983
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,31,0.02077440023422241
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,63,0.019860799610614776
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,63,0.020526400208473204
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,127,0.020027199387550355
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,127,0.02125599980354309
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,255,0.02292799949645996
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,511,0.024883200228214265
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,511,0.026836800575256347
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,1023,0.027632001042366027
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,1023,0.02935200035572052
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,2047,0.04095200002193451
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,2047,0.035867199301719666
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,4095,0.057092797756195066
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,4095,0.05471519827842712
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,8191,0.08982880115509033
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,8191,0.07935839891433716
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,16383,0.15106240510940552
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,7,0.020216000080108643
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,1,0.020398400723934174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,15,0.020209600031375886
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,16383,0.13110560178756714
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,1,0.021190400421619415
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,3,0.020127999782562255
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,3,0.021055999398231506
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,7,0.02101760059595108
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,15,0.021180799603462218
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,31,0.020336000621318816
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,31,0.021055999398231506
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,63,0.020127999782562255
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,63,0.02125760018825531
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,127,0.020452800393104553
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,127,0.021081599593162536
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,255,0.022380800545215608
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,255,0.02316479980945587
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,511,0.025464001297950744
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,511,0.027356800436973572
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,1023,0.03967519998550415
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,1023,0.03486559987068176
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,2047,0.05504639744758606
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,2047,0.05391200184822083
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,4095,0.08859360218048096
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,4095,0.07960320115089417
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,8191,0.14844800233840943
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,8191,0.13140000104904176
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,16383,0.26916959285736086
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,1,0.021089600026607515
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,1,0.021836799383163453
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,15,0.021188800036907197
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,16383,0.22842719554901122
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,3,0.021331200003623964
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,3,0.021775999665260316
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,7,0.021076799929142
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,7,0.021852800250053407
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,15,0.02189279943704605
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,31,0.02096959948539734
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,31,0.021840000152587892
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,63,0.020751999318599702
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,63,0.022251200675964356
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,127,0.021299199759960176
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,127,0.02218559980392456
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,255,0.022889600694179536
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,255,0.02385119944810867
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,511,0.025772801041603087
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,511,0.027612799406051637
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,1023,0.029097598791122437
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,8191,0.09190080165863038
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,1023,0.03052479922771454
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,2047,0.04255839884281158
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,2047,0.03674240112304687
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,4095,0.05793600082397461
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,4095,0.05508319735527038
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,32767,0.2323215961456299
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,8191,0.0804975986480713
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,16383,0.15281120538711548
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,16383,0.13367999792099
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,32767,0.27423360347747805
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,7,0.02226240038871765
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,1,0.02122880071401596
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,1,0.022339199483394623
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,3,0.021531200408935545
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,3,0.022038400173187256
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,7,0.021371200680732727
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,15,0.021220800280570985
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,15,0.022276799380779266
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,31,0.021342399716377258
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,31,0.022148799896240235
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,63,0.02157599925994873
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,63,0.02221280038356781
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,127,0.021692800521850585
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,127,0.022313599288463593
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,255,0.023292799293994904
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,255,0.024395200610160827
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,511,0.026444798707962035
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,511,0.028115200996398925
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,1023,0.04100640118122101
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,1023,0.03588959872722626
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,2047,0.056831997632980344
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,2047,0.05484480261802673
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,4095,0.08986080288887024
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,4095,0.07989119887351989
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,8191,0.14980000257492065
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,8191,0.13231840133666992
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,16383,0.2729856014251709
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,16383,0.22949440479278566
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,1,0.02520799934864044
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,32767,0.5218719959259033
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,7,0.026763200759887695
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,7,0.025278401374816895
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,1,0.02688960134983063
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,3,0.02544800043106079
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,32767,0.4235936164855957
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,3,0.026686400175094604
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,15,0.025231999158859254
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,63,0.026947200298309326
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,15,0.026849600672721862
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,31,0.025339201092720032
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,31,0.026840001344680786
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,63,0.025319999456405638
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,127,0.025332799553871153
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,127,0.027020800113677978
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,255,0.028951999545097352
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,1023,0.059787201881408694
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,255,0.030689600110054015
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,511,0.04179520010948181
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,511,0.03907679915428162
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,1023,0.059273600578308105
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,2047,0.09327999949455261
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,2047,0.08333280086517333
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,4095,0.15378880500793457
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,4095,0.13510080575942993
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,8191,0.2759968042373657
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,8191,0.2347872018814087
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,16383,0.5179279804229736
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,1,0.01642719954252243
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,16383,0.43231678009033203
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,1,0.017289599776268004
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,3,0.016484799981117248
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,3,0.017214399576187134
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,7,0.016527999937534333
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,32767,1.0262784004211425
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,7,0.01714719980955124
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,32767,0.8238335609436035
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,15,0.016436800360679626
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,15,0.01709440052509308
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,31,0.01637600064277649
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,31,0.017188799381256104
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,63,0.01643519997596741
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,127,0.016439999639987945
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,63,0.018054400384426118
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,255,0.017846399545669557
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,127,0.017479999363422392
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,255,0.01939679980278015
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,511,0.020897600054740905
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,511,0.0234047994017601
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,1023,0.02014559954404831
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,1023,0.02187200039625168
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,2047,0.02109439969062805
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,2047,0.021887999773025513
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,4095,0.02181120067834854
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,4095,0.022620800137519836
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,8191,0.022976000607013703
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,8191,0.023835200071334838
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,16383,0.025246399641036987
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,16383,0.025419199466705324
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,32767,0.028777599334716797
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,32767,0.030099201202392577
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,1,0.017289599776268004
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,1,0.017875200510025023
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,3,0.016926400363445282
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,3,0.017759999632835387
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,7,0.016791999340057373
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,7,0.017899200320243835
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,15,0.017027199268341064
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,15,0.01798879951238632
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,31,0.01722719967365265
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,31,0.017969599366188048
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,63,0.016811199486255646
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,63,0.01788160055875778
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,127,0.017023999989032746
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,127,0.017998400330543517
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,255,0.01900160014629364
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,255,0.019859200716018675
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,511,0.021648000180721282
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,511,0.023947200179100035
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,1023,0.02118239998817444
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,1023,0.022572800517082214
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,2047,0.02150080054998398
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,2047,0.022865599393844603
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,4095,0.02248000055551529
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,4095,0.02330880016088486
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,8191,0.02382880002260208
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,8191,0.02486239969730377
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,16383,0.026232001185417176
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,16383,0.026974400877952574
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,32767,0.030537599325180055
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,7,0.01857919991016388
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,32767,0.03156160116195679
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,1,0.018795199692249298
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,1,0.01964319944381714
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,3,0.018716800212860107
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,3,0.019419200718402863
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,7,0.01950400024652481
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,15,0.018695999681949616
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,15,0.019380800426006317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,31,0.018873600661754607
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,31,0.01961279958486557
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,63,0.01886879950761795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,63,0.01947840005159378
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,127,0.0188960000872612
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,127,0.019644799828529357
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,255,0.02040479928255081
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,255,0.021483199298381807
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,511,0.02346719950437546
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,511,0.025382399559020996
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,1023,0.023127999901771546
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,1023,0.024289600551128387
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,2047,0.023105600476264955
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,2047,0.02463040053844452
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,4095,0.024318400025367736
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,4095,0.025251200795173644
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,8191,0.02637600004673004
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,8191,0.026756799221038817
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,16383,0.029993599653244017
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,16383,0.030814400315284728
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,32767,0.041422399878501895
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,32767,0.038796800374984744
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,1,0.01719360053539276
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,1,0.018110400438308714
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,3,0.01704320013523102
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,3,0.01791359931230545
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,7,0.017351999878883362
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,7,0.01807200014591217
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,15,0.01733600050210953
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,15,0.018195199966430663
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,31,0.017110399901866913
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,31,0.018131199479103088
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,63,0.017367999255657195
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,63,0.018119999766349794
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,127,0.017267200350761413
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,127,0.017868800461292265
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,255,0.018854400515556334
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,255,0.01992480009794235
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,511,0.021844799816608428
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,511,0.024196800589561463
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,1023,0.021423999965190888
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,1023,0.02244639992713928
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,2047,0.021646399796009064
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,2047,0.023131200671195985
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,4095,0.022361600399017335
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,4095,0.023585599660873414
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,8191,0.023801599442958832
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,8191,0.025118398666381835
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,16383,0.02667199969291687
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,16383,0.02741760015487671
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,32767,0.03065760135650635
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,32767,0.031052801012992858
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,1,0.018881599605083465
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,1,0.019628800451755524
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,3,0.01919520050287247
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,3,0.01979680061340332
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,7,0.01913280040025711
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,7,0.01974399983882904
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,15,0.01889919936656952
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,15,0.01980320066213608
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,31,0.01889919936656952
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,31,0.01985439956188202
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,63,0.0189968004822731
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,63,0.019601599872112276
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,127,0.01870719939470291
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,127,0.019886399805545806
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,255,0.020707200467586517
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,255,0.021873599290847777
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,511,0.023622399568557738
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,511,0.025515198707580566
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,1023,0.02305919975042343
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,1023,0.02433599978685379
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,2047,0.023444800078868865
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,2047,0.025086399912834168
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,4095,0.024417600035667418
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,4095,0.025723201036453248
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,32767,0.037462401390075686
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,8191,0.026606398820877075
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,8191,0.02709600031375885
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,16383,0.03028160035610199
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,16383,0.03104960024356842
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,32767,0.04304159879684448
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,1,0.01902880072593689
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,1,0.02014880031347275
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,3,0.019139200448989868
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,3,0.019867199659347533
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,7,0.018991999328136444
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,7,0.02003999948501587
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,15,0.019062399864196777
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,15,0.019808000326156615
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,31,0.019150400161743165
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,31,0.019755199551582336
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,63,0.018961599469184874
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,63,0.019875200092792512
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,127,0.01897439956665039
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,127,0.019864000380039215
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,255,0.02072799950838089
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,255,0.02202560007572174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,511,0.023736000061035156
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,511,0.025679999589920045
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,1023,0.023412799835205077
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,1023,0.024564799666404725
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,2047,0.024031999707221984
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,2047,0.02507199943065643
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,4095,0.025956800580024718
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,4095,0.026238399744033813
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,8191,0.029440000653266907
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,8191,0.02968960106372833
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,16383,0.04120799899101257
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,16383,0.03613600134849548
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,32767,0.05820479989051819
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,32767,0.05593119859695435
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,1,0.019892799854278564
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,1,0.020900799334049223
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,3,0.0202224001288414
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,3,0.02067999988794327
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,7,0.020134399831295013
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,7,0.020772799849510193
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,15,0.02021760046482086
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,15,0.020878399908542632
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,31,0.02033279985189438
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,31,0.020707200467586517
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,63,0.021115200221538545
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,63,0.020201599597930907
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,127,0.020147199928760528
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,127,0.020876799523830415
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,255,0.022137600183486938
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,255,0.02306240051984787
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,511,0.025171199440956117
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,511,0.026305601000785828
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,1023,0.025865599513053894
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,1023,0.026240000128746034
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,2047,0.02795040011405945
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,2047,0.030195200443267824
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,4095,0.04129279851913452
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,4095,0.03583999872207642
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,8191,0.05707039833068848
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,8191,0.05437279939651489
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,3,0.02021760046482086
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,16383,0.09173920154571533
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,16383,0.08018239736557006
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,32767,0.15312800407409669
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,1,0.02051839977502823
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,32767,0.13244479894638062
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,1,0.021371200680732727
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,3,0.021303999423980712
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,7,0.02029760032892227
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,7,0.02106720060110092
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,15,0.020300799608230592
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,15,0.021300800144672394
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,31,0.020638400316238405
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,31,0.02099359929561615
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,63,0.0205375999212265
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,63,0.02128639966249466
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,127,0.02035679966211319
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,127,0.02151840031147003
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,255,0.02200160026550293
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,255,0.023422400653362273
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,511,0.025313600897789
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,511,0.026815998554229736
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,1023,0.028167998790740965
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,1023,0.029743999242782593
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,2047,0.041464000940322876
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,2047,0.03610239923000336
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,4095,0.05752639770507813
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,4095,0.055032002925872806
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,8191,0.09027199745178223
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,8191,0.08043040037155151
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,16383,0.15218240022659302
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,16383,0.13247519731521606
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,32767,0.2747663974761963
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,1,0.020815999805927278
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,7,0.021512000262737273
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,32767,0.23143839836120605
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,1,0.021723200380802155
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,3,0.020446400344371795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,3,0.021451200544834136
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,7,0.02065120041370392
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,15,0.021715199947357176
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,15,0.02077440023422241
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,31,0.02070080041885376
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,31,0.0216511994600296
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,63,0.020822399854660036
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,63,0.021715199947357176
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,127,0.02096319943666458
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,127,0.021673600375652313
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,255,0.022519999742507936
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,255,0.023843200504779817
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,511,0.02573919892311096
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,511,0.027769601345062254
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,1023,0.039776000380516055
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,1023,0.03544479906558991
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,2047,0.05567839741706848
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,2047,0.05432159900665283
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,4095,0.08950399756431579
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,4095,0.07946400046348571
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,8191,0.15011039972305298
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,8191,0.13131040334701538
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,16383,0.27234079837799074
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,16383,0.22807519435882567
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,1,0.018937599658966065
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,32767,0.515611219406128
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,1,0.01968960016965866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,3,0.019219200313091277
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,32767,0.4258863925933838
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,3,0.019812799990177155
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,7,0.019072000682353974
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,7,0.019916799664497376
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,15,0.01879200041294098
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,15,0.02007199972867966
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,31,0.018915200233459474
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,31,0.01980320066213608
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,63,0.01915999948978424
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,63,0.019815999269485473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,127,0.019011199474334717
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,127,0.01977279931306839
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,255,0.020500800013542174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,255,0.022009600698947907
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,511,0.023694400489330292
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,511,0.02585119903087616
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,1023,0.023384000360965728
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,1023,0.024459199607372285
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,2047,0.023601600527763368
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,2047,0.025040000677108765
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,4095,0.024401600658893585
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,8191,0.026651200652122498
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,4095,0.02563520073890686
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,8191,0.027190399169921876
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,16383,0.030656000971794127
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,16383,0.030932798981666565
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,32767,0.042654401063919066
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,32767,0.03717280030250549
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,1,0.01892160028219223
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,1,0.019952000677585603
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,3,0.01897120028734207
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,3,0.019652800261974336
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,7,0.018995200097560883
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,7,0.019875200092792512
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,15,0.019200000166893005
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,15,0.01974720060825348
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,31,0.019068799912929535
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,31,0.019900800287723543
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,63,0.01932000070810318
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,63,0.02004159986972809
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,127,0.01911199986934662
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,127,0.019835199415683746
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,255,0.02062560021877289
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,255,0.021766400337219237
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,511,0.0237184002995491
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,511,0.025548800826072693
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,1023,0.02330880016088486
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,1023,0.024592000246047973
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,2047,0.024214400351047514
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,2047,0.0248879998922348
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,4095,0.025884801149368288
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,4095,0.026159998774528504
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,8191,0.029177600145339967
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,8191,0.030219200253486633
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,16383,0.04103519916534424
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,16383,0.03747360110282898
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,32767,0.05769919753074646
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,32767,0.056454402208328244
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,1,0.01914079934358597
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,1,0.02008959949016571
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,3,0.019198399782180787
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,3,0.019940799474716185
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,7,0.01929119974374771
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,7,0.02022400051355362
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,15,0.019497600197792054
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,15,0.02008800059556961
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,31,0.019324800372123717
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,31,0.020209600031375886
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,63,0.019233599305152893
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,63,0.01987359970808029
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,127,0.01913599967956543
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,127,0.020046399533748628
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,255,0.021091200411319733
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,255,0.021825599670410156
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,511,0.02409600019454956
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,511,0.02558239996433258
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,1023,0.023820799589157105
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,1023,0.0247856006026268
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,2047,0.025593599677085875
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,2047,0.025872001051902772
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,4095,0.029289600253105164
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,4095,0.029265600442886352
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,8191,0.04053440093994141
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,8191,0.035569599270820616
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,16383,0.05645920038223266
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,16383,0.05366560220718384
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,32767,0.08903040289878845
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,1,0.02252800017595291
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,1,0.02354400008916855
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,32767,0.07892799973487855
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,3,0.022593599557876588
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,3,0.023580799996852874
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,7,0.02282080054283142
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,7,0.023630400002002717
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,15,0.02264160066843033
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,15,0.023825600743293762
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,31,0.023076799511909486
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,31,0.023611199855804444
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,63,0.023095999658107758
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,63,0.023601600527763368
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,127,0.022908799350261688
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,127,0.023654399812221526
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,255,0.02462400048971176
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,255,0.02561599910259247
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,511,0.028011199831962586
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,511,0.02935839891433716
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,1023,0.043059200048446655
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,1023,0.03718560039997101
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,2047,0.05851200222969055
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,2047,0.056883198022842404
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,4095,0.09198560118675232
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,4095,0.08151519894599915
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,8191,0.15343680381774902
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,8191,0.13396960496902466
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,1,0.026480001211166383
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,1,0.028248000144958495
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,16383,0.23209280967712403
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,16383,0.27446880340576174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,3,0.0267551988363266
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,3,0.02844479978084564
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,7,0.026636800169944762
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,7,0.0281792014837265
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,15,0.026787200570106508
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,15,0.028254398703575136
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,31,0.026612800359725953
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,31,0.0284496009349823
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,63,0.026556798815727235
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,63,0.028305599093437196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,127,0.026651200652122498
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,127,0.028519999980926514
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,255,0.029947200417518617
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,255,0.032374399900436404
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,511,0.043582400679588316
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,511,0.03964639902114868
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,1023,0.06188160181045532
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,1023,0.06119840145111084
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,2047,0.09429439902305603
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,2047,0.08515679836273193
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,4095,0.1557744026184082
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,4095,0.13614239692687988
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,8191,0.2771231889724731
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,16383,0.43201279640197754
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,8191,0.23693439960479737
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,1,0.034414398670196536
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,16383,0.5299280166625977
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,1,0.037503999471664426
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,3,0.03432640135288238
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,3,0.037457600235939026
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,7,0.03418239951133728
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,7,0.03765600025653839
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,15,0.034283199906349184
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,15,0.03766080141067505
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,31,0.03447200059890747
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,31,0.03760319948196411
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,63,0.03436320126056671
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,63,0.03751519918441772
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,127,0.03484799861907959
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,127,0.03752799928188324
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,255,0.04939360022544861
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,255,0.04651040136814118
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,511,0.0627951979637146
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,511,0.06619359850883484
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,1023,0.09833599925041199
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,1023,0.09360160231590271
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,2047,0.1589184045791626
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,2047,0.14225280284881592
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,4095,0.24385440349578857
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,4095,0.27934560775756834
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,8191,0.5359951972961425
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,8191,0.44176158905029295
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,1,0.030044800043106078
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,1,0.03163360059261322
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,3,0.029875200986862183
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,3,0.031563198566436766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,16383,1.0386431694030762
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,7,0.029783999919891356
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,16383,0.829248046875
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,7,0.03134079873561859
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,15,0.029982399940490723
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,127,0.031118398904800414
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,15,0.03144319951534271
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,31,0.030052798986434936
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,31,0.03138880133628845
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,63,0.030711999535560607
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,63,0.031483200192451474
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,127,0.03162240087985992
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,255,0.033404800295829776
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,255,0.035876798629760745
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,511,0.04782719910144806
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,511,0.04349600076675415
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,4095,0.16032639741897584
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,1023,0.06703199744224549
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,1023,0.0653168022632599
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,2047,0.09735519886016845
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,3,0.0373663991689682
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,2047,0.08797600269317626
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,4095,0.14115999937057494
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,1,0.03705599904060364
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,1,0.040375998616218566
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,3,0.04063520133495331
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,7,0.0373663991689682
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,7,0.04107840061187744
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,15,0.03727200031280518
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,15,0.040505599975585935
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,31,0.037031999230384825
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,31,0.040479999780654904
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,63,0.03722400069236755
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,63,0.04101920127868652
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,127,0.038443198800086974
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,127,0.04085760116577149
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,255,0.05244960188865662
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,1023,0.09874719977378846
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,255,0.048430401086807254
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,511,0.06745439767837524
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,511,0.06928319931030273
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,1023,0.10232959985733033
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,2047,0.1620736002922058
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,2047,0.14686559438705443
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,4095,0.2858367919921875
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,1,0.05205919742584229
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,1,0.058254402875900266
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,7,0.058631998300552365
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,4095,0.2458415985107422
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,31,0.052183997631072995
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,15,0.05849760174751282
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,3,0.05210559964179993
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,3,0.05785279870033264
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,7,0.05217599868774414
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,15,0.052076798677444455
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,31,0.058822399377822875
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,63,0.05253919959068298
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,63,0.05825759768486023
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,127,0.05899680256843567
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,127,0.05898879766464234
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,255,0.07547839879989623
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,255,0.07960000038146972
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,511,0.10610719919204711
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,511,0.11061760187149047
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,1023,0.16970560550689698
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,1023,0.1629855990409851
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,2047,0.2911423921585083
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,1,128,1,float16,float16,3,0.04437919855117798
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,2047,0.25782558917999265
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,1,128,1,float16,float16,1,0.04402399957180023
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,4095,0.5418367862701416
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,1,128,1,float16,fp8,1,0.04711839854717255
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,4095,0.46010241508483884
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,1,128,1,float16,fp8,3,0.04702560007572174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,1,128,1,float16,float16,7,0.044172799587249754
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,1,128,1,float16,fp8,7,0.047056001424789426
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,1,128,1,float16,float16,15,0.04430879950523377
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,1,128,1,float16,fp8,15,0.047495999932289125
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,1,128,1,float16,fp8,31,0.04738079905509949
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,1,128,1,float16,float16,31,0.04553279876708984
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,1,128,1,float16,float16,63,0.04411360025405884
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,1,128,1,float16,fp8,63,0.047465598583221434
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,1,128,1,float16,float16,127,0.044675201177597046
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,1,128,1,float16,fp8,127,0.04730879962444305
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,1,128,1,float16,float16,255,0.060134398937225345
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,1,128,1,float16,fp8,255,0.056086397171020506
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,1,128,1,float16,float16,511,0.07565600275993348
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,1,128,1,float16,fp8,511,0.07730720043182374
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,1,128,1,float16,float16,1023,0.11191999912261963
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,1,128,1,float16,fp8,1023,0.10567519664764405
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,1,128,1,float16,float16,2047,0.17116320133209229
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,2,128,1,float16,float16,1,0.058387202024459836
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,1,128,1,float16,fp8,2047,0.15309280157089233
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,2,128,1,float16,fp8,1,0.06409599781036376
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,2,128,1,float16,float16,3,0.0587552011013031
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,2,128,1,float16,fp8,3,0.06505920290946961
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,2,128,1,float16,float16,7,0.05888320207595825
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,2,128,1,float16,fp8,7,0.06467519998550415
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,2,128,1,float16,float16,15,0.058411198854446414
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,2,128,1,float16,fp8,15,0.06499040126800537
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,2,128,1,float16,float16,31,0.05847039818763733
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,2,128,1,float16,fp8,31,0.06438239812850952
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,2,128,1,float16,float16,63,0.059515202045440675
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,2,128,1,float16,fp8,63,0.06522399783134461
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,2,128,1,float16,float16,127,0.06681920289993286
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,2,128,1,float16,fp8,127,0.06538400053977966
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,2,128,1,float16,float16,255,0.08337119817733765
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,2,128,1,float16,fp8,255,0.08649280071258544
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,2,128,1,float16,float16,511,0.11300480365753174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,2,128,1,float16,fp8,511,0.11722240447998047
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,2,128,1,float16,float16,1023,0.1799072027206421
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,2,128,1,float16,fp8,1023,0.17206079959869386
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,2,128,1,float16,float16,2047,0.30191199779510497
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,2,128,1,float16,fp8,2047,0.26398239135742185
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,4,128,1,float16,float16,1,0.08721280097961426
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,4,128,1,float16,fp8,1,0.09976320266723633
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,4,128,1,float16,float16,3,0.08694239854812622
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,4,128,1,float16,fp8,3,0.09992160201072693
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,4,128,1,float16,float16,7,0.08743839859962463
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,4,128,1,float16,fp8,7,0.1002303957939148
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,4,128,1,float16,float16,15,0.08662880063056946
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,4,128,1,float16,fp8,15,0.10025759935379028
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,4,128,1,float16,float16,31,0.08753759860992431
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,4,128,1,float16,fp8,31,0.09977759718894959
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,4,128,1,float16,float16,63,0.09389920234680176
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,4,128,1,float16,fp8,63,0.10103039741516114
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,4,128,1,float16,float16,127,0.09720799922943116
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,4,128,1,float16,fp8,127,0.10887839794158935
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,4,128,1,float16,float16,255,0.12862080335617065
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,4,128,1,float16,fp8,255,0.13814560174942017
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,4,128,1,float16,float16,511,0.1853935956954956
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,4,128,1,float16,fp8,511,0.20010240077972413
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,4,128,1,float16,float16,1023,0.3112512111663818
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,4,128,1,float16,fp8,1023,0.30221118927001955
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,1,0.019276799261569978
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,4,128,1,float16,float16,2047,0.5580239772796631
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,1,0.01982239931821823
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,3,0.01923999935388565
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,8,4,128,1,float16,fp8,2047,0.4858255863189697
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,3,0.020084799826145174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,7,0.019257600605487823
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,7,0.020017600059509276
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,15,0.019630399346351624
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,15,0.01997919976711273
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,31,0.019251200556755065
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,31,0.02035360038280487
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,63,0.019300800561904908
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,63,0.02020000070333481
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,127,0.019411200284957887
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,127,0.020185600221157073
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,255,0.020924800634384157
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,255,0.02191520035266876
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,511,0.023795199394226075
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,511,0.025753599405288697
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,1023,0.023206399381160737
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,1023,0.024929599463939668
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,2047,0.024273599684238433
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,2047,0.025364801287651062
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,4095,0.025643199682235718
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,4095,0.026545599102973938
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,8191,0.02958880066871643
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,8191,0.030403199791908263
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,16383,0.042054399847984314
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,16383,0.03652639985084534
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,32767,0.0585968017578125
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,32767,0.05604959726333618
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,1,0.019299200177192687
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,1,0.020252799987792967
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,3,0.0191648006439209
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,3,0.02022400051355362
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,7,0.019200000166893005
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,7,0.020304000377655028
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,15,0.019256000220775605
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,15,0.020563200116157532
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,31,0.01950239986181259
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,31,0.01993599981069565
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,63,0.019177600741386414
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,63,0.020076799392700195
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,127,0.019508799910545348
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,1023,0.023838399350643157
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,127,0.020295999944210052
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,255,0.02109760046005249
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,255,0.02249760031700134
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,511,0.023788799345493317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,511,0.025675201416015626
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,1023,0.025073599815368653
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,2047,0.025596800446510314
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,2047,0.02603679895401001
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,4095,0.029287999868392943
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,4095,0.029548799991607665
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,32767,0.07993919849395752
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,8191,0.04066720008850098
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,8191,0.03561600148677826
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,16383,0.05647040009498596
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,16383,0.05405759811401367
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,32767,0.08974239826202393
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,1,0.019620800018310548
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,1,0.020347200334072113
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,3,0.019420799612998963
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,3,0.020336000621318816
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,7,0.019735999405384064
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,7,0.020473599433898926
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,15,0.019390399754047393
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,15,0.020291200280189513
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,31,0.01993599981069565
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,31,0.020425599813461304
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,63,0.019462400674819948
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,63,0.02054080069065094
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,127,0.019617600739002226
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,127,0.020374399423599244
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,255,0.021217599511146545
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,255,0.022569599747657775
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,511,0.02462559938430786
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,511,0.026263999938964843
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,1023,0.02560960054397583
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,1023,0.025591999292373657
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,2047,0.027260801196098326
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,2047,0.029526400566101074
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,4095,0.039806398749351504
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,4095,0.035776001214981076
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,8191,0.056699198484420774
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,8191,0.0537056028842926
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,16383,0.08930720090866089
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,16383,0.07934880256652832
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,32767,0.15067679882049562
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,32767,0.13118879795074462
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,1,128,1,float16,float16,1,0.06591839790344238
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,1,128,1,float16,fp8,1,0.0713919997215271
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,1,128,1,float16,float16,3,0.06561279892921448
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,1,128,1,float16,fp8,3,0.0711408019065857
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,1,128,1,float16,float16,7,0.06609600186347961
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,1,128,1,float16,fp8,7,0.07178400158882141
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,1,128,1,float16,float16,15,0.06588799953460693
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,1,128,1,float16,fp8,15,0.07175359725952149
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,1,128,1,float16,float16,31,0.06588000059127808
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,1,128,1,float16,fp8,127,0.07217440009117126
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,1,128,1,float16,fp8,31,0.07158560156822205
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,1,128,1,float16,float16,63,0.06776480078697204
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,1,128,1,float16,fp8,63,0.07159519791603089
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,1,128,1,float16,float16,127,0.07434080243110656
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,1,128,1,float16,float16,255,0.09145920276641846
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,1,128,1,float16,fp8,255,0.09431999921798706
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,1,128,1,float16,float16,511,0.12134560346603393
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,1,128,1,float16,fp8,511,0.12532639503479004
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,1,128,1,float16,float16,1023,0.18570079803466796
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,2,128,1,float16,float16,1,0.09425600171089173
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,1,128,1,float16,fp8,1023,0.17686879634857178
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,2,128,1,float16,fp8,1,0.10629279613494873
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,2,128,1,float16,float16,3,0.09490240216255189
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,2,128,1,float16,fp8,3,0.10662720203399659
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,2,128,1,float16,float16,7,0.09421600103378296
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,2,128,1,float16,fp8,7,0.10604159832000733
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,2,128,1,float16,float16,15,0.0937279999256134
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,2,128,1,float16,fp8,15,0.10658719539642333
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,2,128,1,float16,float16,31,0.09416800141334533
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,2,128,1,float16,fp8,127,0.11598399877548218
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,2,128,1,float16,fp8,31,0.10665279626846313
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,2,128,1,float16,float16,63,0.10182559490203857
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,2,128,1,float16,fp8,63,0.1074944019317627
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,2,128,1,float16,float16,127,0.10429120063781738
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,2,128,1,float16,float16,255,0.13690880537033082
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,2,128,1,float16,fp8,255,0.1447327971458435
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,2,128,1,float16,float16,511,0.19377280473709108
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,2,128,1,float16,fp8,511,0.2066800117492676
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,2,128,1,float16,float16,1023,0.3200943946838379
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,4,128,1,float16,float16,1,0.15102399587631227
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,2,128,1,float16,fp8,1023,0.3078000068664551
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,4,128,1,float16,fp8,1,0.17525119781494142
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,4,128,1,float16,float16,3,0.14992480278015136
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,4,128,1,float16,fp8,3,0.17569119930267335
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,4,128,1,float16,float16,7,0.14996960163116455
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,4,128,1,float16,fp8,7,0.17498559951782228
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,4,128,1,float16,float16,15,0.150764799118042
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,4,128,1,float16,fp8,15,0.17556639909744262
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,4,128,1,float16,float16,31,0.15488320589065552
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,4,128,1,float16,fp8,31,0.1759600043296814
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,4,128,1,float16,float16,63,0.1580288052558899
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,4,128,1,float16,fp8,63,0.18772319555282593
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,4,128,1,float16,float16,127,0.1616528034210205
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,4,128,1,float16,fp8,127,0.18922239542007446
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,4,128,1,float16,float16,255,0.2213792085647583
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,4,128,1,float16,fp8,255,0.24550399780273438
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,4,128,1,float16,float16,1023,0.5828112125396728
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,4,128,1,float16,float16,511,0.33414719104766843
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,4,128,1,float16,fp8,511,0.3672816038131714
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,1,128,1,float16,float16,1,0.10629279613494873
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,1,128,1,float16,fp8,1,0.11756000518798829
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,1,128,1,float16,float16,3,0.10637279748916625
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,8,4,128,1,float16,fp8,1023,0.5634687900543213
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,1,128,1,float16,fp8,3,0.118286395072937
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,1,128,1,float16,float16,7,0.10565439462661744
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,1,128,1,float16,fp8,7,0.11798720359802246
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,1,128,1,float16,float16,15,0.10575360059738159
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,1,128,1,float16,fp8,15,0.11743839979171752
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,1,128,1,float16,float16,31,0.10691039562225342
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,1,128,1,float16,fp8,31,0.11802879571914673
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,1,128,1,float16,float16,63,0.1150480031967163
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,1,128,1,float16,fp8,63,0.12191840410232543
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,1,128,1,float16,float16,127,0.1180575966835022
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,2,128,1,float16,float16,1,0.1622655987739563
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,1,128,1,float16,fp8,127,0.1288815975189209
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,1,128,1,float16,float16,255,0.15020159482955933
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,1,128,1,float16,fp8,255,0.15680320262908937
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,1,128,1,float16,float16,511,0.2054975986480713
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,1,128,1,float16,fp8,511,0.2205183982849121
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,2,128,1,float16,fp8,1,0.18464640378952027
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,2,128,1,float16,float16,3,0.16125760078430176
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,2,128,1,float16,fp8,3,0.18424960374832153
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,2,128,1,float16,float16,7,0.16198879480361938
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,2,128,1,float16,fp8,7,0.18410079479217528
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,2,128,1,float16,float16,15,0.1604383945465088
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,2,128,1,float16,fp8,15,0.18448799848556519
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,2,128,1,float16,float16,31,0.16694560050964355
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,2,128,1,float16,fp8,31,0.18501919507980347
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,2,128,1,float16,float16,63,0.17178720235824585
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,2,128,1,float16,fp8,63,0.19835840463638305
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,2,128,1,float16,float16,127,0.1762719988822937
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,2,128,1,float16,fp8,127,0.20069921016693115
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,2,128,1,float16,float16,255,0.23429439067840577
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,2,128,1,float16,fp8,255,0.255945611000061
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,4,128,1,float16,fp8,1,0.3244848012924194
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,2,128,1,float16,float16,511,0.3431504011154175
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,4,128,1,float16,float16,1,0.2773504018783569
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,4,128,1,float16,float16,7,0.2776240110397339
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,2,128,1,float16,fp8,511,0.3808432102203369
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,4,128,1,float16,float16,3,0.2753040075302124
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,4,128,1,float16,fp8,3,0.3239232063293457
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,4,128,1,float16,fp8,7,0.3249311923980713
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,4,128,1,float16,float16,15,0.27830240726470945
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,4,128,1,float16,fp8,15,0.3260672092437744
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,4,128,1,float16,float16,31,0.2871232032775879
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,4,128,1,float16,fp8,31,0.3407919883728027
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,4,128,1,float16,float16,63,0.2884655952453613
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,4,128,1,float16,fp8,63,0.3470495939254761
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,4,128,1,float16,float16,127,0.29568479061126707
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,4,128,1,float16,fp8,127,0.3484816074371338
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,4,128,1,float16,float16,255,0.4089024066925049
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,4,128,1,float16,fp8,255,0.45750718116760253
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,1,0.019337600469589232
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,1,0.020239999890327452
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,3,0.019356800615787505
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,4,128,1,float16,float16,511,0.6354351997375488
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,3,0.02035360038280487
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,7,0.01972319930791855
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,31,0.020499199628829956
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,8,4,128,1,float16,fp8,511,0.6943471908569336
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,7,0.020214399695396422
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,15,0.019681599736213685
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,15,0.020164799690246583
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,31,0.019336000084877014
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,63,0.019249600172042847
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,63,0.020336000621318816
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,127,0.01966879963874817
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,127,0.020075200498104094
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,255,0.021206399798393248
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,255,0.022409600019454957
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,511,0.024063999950885772
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,511,0.025935998558998107
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,1023,0.023707200586795808
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,1023,0.025390401482582092
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,2047,0.02566719949245453
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,2047,0.026043200492858888
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,4095,0.029100799560546876
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,4095,0.029675200581550598
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,8191,0.04096960127353668
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,8191,0.03584960103034973
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,16383,0.0572816014289856
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,16383,0.054203200340271
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,32767,0.09038079977035522
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,32767,0.08015040159225464
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,1,0.019729599356651306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,1,0.02022079974412918
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,3,0.019572800397872923
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,3,0.020372800529003143
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,7,0.01945440024137497
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,7,0.020585599541664123
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,15,0.020000000298023225
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,15,0.020547200739383698
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,31,0.019617600739002226
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,31,0.020179200172424316
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,63,0.019529600441455842
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,63,0.020614400506019592
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,127,0.01953279972076416
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,127,0.020556800067424774
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,255,0.021264000236988066
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,255,0.022540800273418427
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,511,0.024323199689388276
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,511,0.02616479992866516
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,1023,0.02529279887676239
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,1023,0.02585279941558838
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,2047,0.027532801032066345
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,2047,0.029627200961112977
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,4095,0.040982401371002196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,4095,0.035571199655532834
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,8191,0.056524801254272464
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,8191,0.05396639704704285
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,16383,0.08949599862098694
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,16383,0.07901440262794494
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,32767,0.14956799745559693
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,32767,0.13262399435043334
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,1,0.019915199279785155
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,1,0.020703999698162077
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,3,0.019923199713230134
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,3,0.02064799964427948
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,7,0.019939200580120088
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,7,0.020744000375270844
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,15,0.020025600492954255
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,15,0.020878399908542632
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,31,0.020084799826145174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,31,0.020742399990558623
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,63,0.019860799610614776
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,63,0.020764799416065217
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,127,0.019731199741363524
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,127,0.020715199410915375
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,255,0.021638399362564086
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,255,0.02259040027856827
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,511,0.024665600061416625
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,511,0.02646239995956421
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,4095,0.05659679770469665
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,1023,0.027492800354957582
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,1023,0.02932800054550171
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,2047,0.04085119962692261
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,2047,0.035812801122665404
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,4095,0.05465919971466064
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,8191,0.0894864022731781
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,8191,0.08055199980735779
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,16383,0.15001280307769777
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,16383,0.1316480040550232
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,32767,0.27082080841064454
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,1,0.020974400639533996
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,32767,0.2310352087020874
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,1,0.021652799844741822
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,3,0.02105119973421097
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,3,0.021862399578094483
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,7,0.02096800059080124
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,15,0.021359999477863312
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,7,0.021689599752426146
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,15,0.02221280038356781
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,31,0.021063999831676485
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,31,0.022006399929523468
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,63,0.020947200059890748
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,63,0.02168319970369339
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,127,0.021403199434280394
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,127,0.022060799598693847
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,255,0.022694399952888487
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,255,0.02393440008163452
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,511,0.02611519992351532
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,4095,0.05904960036277771
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,511,0.027721598744392395
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,1023,0.030003198981285097
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,8191,0.08310559988021851
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,1023,0.03195840120315552
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,2047,0.04301919937133789
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,2047,0.038099199533462524
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,4095,0.056531202793121335
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,32767,0.23638720512390138
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,8191,0.0943231999874115
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,16383,0.1582751989364624
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,1,0.022313599288463593
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,16383,0.13794879913330077
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,3,0.02221119999885559
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,32767,0.2828239917755127
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,1,0.021457600593566894
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,65535,0.5206592082977295
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,3,0.02125920057296753
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,65535,0.43237600326538084
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,31,0.022257600724697114
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,7,0.021462400257587434
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,7,0.022303999960422517
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,15,0.021457600593566894
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,15,0.02221439927816391
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,31,0.021641600131988525
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,63,0.021585600078105928
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,63,0.02266719937324524
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,127,0.021726399660110474
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,127,0.022409600019454957
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,255,0.023545600473880768
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,255,0.024540799856185912
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,511,0.026657599210739135
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,4095,0.08975039720535279
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,511,0.02826879918575287
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,1023,0.041843199729919435
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,1023,0.035913598537445066
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,2047,0.05639359951019287
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,2047,0.05456159710884094
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,4095,0.08049119710922241
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,8191,0.15268319845199585
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,8191,0.131113600730896
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,16383,0.27387840747833253
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,16383,0.23061439990997315
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,32767,0.517302417755127
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,1,0.016407999396324157
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,32767,0.4258063793182373
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,1,0.017496000230312347
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,3,0.01653279960155487
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,3,0.01724960058927536
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,7,0.017372800409793852
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,65535,0.9905712127685546
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,7,0.016310399770736693
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,65535,0.815113639831543
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,15,0.01615999937057495
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,15,0.01740480065345764
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,31,0.016164800524711607
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,31,0.01754239946603775
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,63,0.0162992000579834
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,63,0.01805119961500168
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,127,0.016195200383663177
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,127,0.017262400686740877
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,255,0.017892800271511078
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,255,0.01926880031824112
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,2047,0.022881600260734557
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,511,0.02104319930076599
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,511,0.023257599771022798
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,1023,0.02144480049610138
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,1023,0.022894400358200073
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,2047,0.021644799411296843
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,4095,0.021798400580883025
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,4095,0.022908799350261688
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,8191,0.023689599335193635
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,8191,0.024979199469089507
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,16383,0.026894399523735048
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,16383,0.028372800350189208
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,32767,0.028751999139785767
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,32767,0.02961759865283966
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,65535,0.0328031986951828
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,65535,0.03377920091152191
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,1,0.017312000691890716
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,1,0.017923200130462648
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,3,0.017892800271511078
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,3,0.01698720008134842
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,7,0.017195199429988862
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,7,0.01790879964828491
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,15,0.01683039963245392
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,15,0.01818400025367737
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,31,0.0170864000916481
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,31,0.018038399517536163
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,63,0.01703840047121048
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,255,0.019883200526237488
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,63,0.018049600720405578
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,127,0.017287999391555786
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,127,0.017822399735450745
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,255,0.01847359985113144
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,511,0.021712000668048858
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,511,0.023999999463558196
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,1023,0.022148799896240235
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,1023,0.023520000278949738
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,2047,0.022089600563049316
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,2047,0.023801599442958832
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,4095,0.022643199563026427
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,32767,0.029867199063301087
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,4095,0.024241599440574645
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,8191,0.024740800261497498
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,8191,0.025947201251983642
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,16383,0.026502400636672974
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,16383,0.02730880081653595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,32767,0.030608001351356506
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,65535,0.04122720062732697
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,65535,0.03861759901046753
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,1,0.017003199458122252
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,1,0.017983999848365784
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,3,0.016991999745368958
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,3,0.018119999766349794
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,7,0.01706240028142929
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,7,0.01791200041770935
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,15,0.01720480024814606
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,15,0.0180976003408432
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,31,0.01703680008649826
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,31,0.017907199263572694
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,255,0.020236800611019134
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,63,0.01729599982500076
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,63,0.017929600179195405
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,127,0.017158399522304534
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,127,0.01788640022277832
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,255,0.018760000169277192
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,511,0.021598400175571443
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,511,0.024171200394630433
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,1023,0.022407999634742735
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,1023,0.023873600363731384
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,2047,0.022526399791240694
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,2047,0.0238864004611969
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,4095,0.022908799350261688
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,4095,0.024167999625205994
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,8191,0.024753600358963013
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,8191,0.026009601354599
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,16383,0.029223999381065367
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,16383,0.029633599519729614
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,32767,0.03294079899787903
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,32767,0.033103999495506284
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,65535,0.043875199556350705
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,65535,0.039468801021575926
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,1,0.018990400433540344
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,1,0.019739200174808503
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,15,0.01977120041847229
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,3,0.018905599415302277
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,3,0.01959040015935898
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,7,0.01878879964351654
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,7,0.019627200067043306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,15,0.01894879937171936
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,31,0.019017599523067474
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,31,0.019760000705718993
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,63,0.018873600661754607
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,63,0.019655999541282655
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,127,0.018831999599933626
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,127,0.01969279944896698
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,255,0.020737600326538087
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,255,0.022137600183486938
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,511,0.023414400219917298
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,511,0.02588160037994385
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,1023,0.02401600033044815
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,1023,0.025654399394989015
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,2047,0.02423200011253357
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,2047,0.02582240104675293
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,4095,0.025
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,4095,0.025995200872421263
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,8191,0.02789280116558075
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,8191,0.027880001068115234
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,65535,0.06086080074310303
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,16383,0.03160319924354553
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,16383,0.03171679973602295
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,32767,0.04318400025367737
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,3,0.020689600706100465
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,32767,0.03805440068244934
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,1,0.020139199495315552
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,65535,0.05745279788970947
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,1,0.021158400177955627
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,3,0.01998720020055771
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,7,0.020360000431537628
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,7,0.02078240066766739
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,15,0.019823999702930452
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,15,0.020919999480247496
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,31,0.020108799636363982
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,31,0.020905600488185884
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,63,0.01987999975681305
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,63,0.02099999934434891
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,127,0.02011999934911728
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,127,0.020923200249671935
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,255,0.021833600103855134
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,255,0.02295999974012375
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,511,0.024852800369262695
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,4095,0.04234879910945892
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,511,0.026451200246810913
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,1023,0.02728480100631714
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,1023,0.027294400334358215
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,2047,0.0288239985704422
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,2047,0.03107360005378723
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,4095,0.037134400010108946
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,8191,0.05940799713134766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,8191,0.05697759985923767
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,65535,0.28049280643463137
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,16383,0.095687997341156
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,16383,0.08510879874229431
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,32767,0.15698879957199097
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,32767,0.13783040046691894
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,1,0.02051360011100769
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,65535,0.23789761066436768
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,1,0.021246400475502015
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,3,0.02036159932613373
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,3,0.021164800226688384
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,7,0.020294399559497835
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,7,0.021201600134372712
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,15,0.020241600275039674
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,15,0.02115200012922287
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,31,0.02048960030078888
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,31,0.021404799818992615
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,63,0.020436799526214598
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,63,0.021334399282932282
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,127,0.02029760032892227
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,127,0.02096160054206848
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,255,0.022260800004005432
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,255,0.023214399814605713
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,511,0.025308799743652344
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,511,0.027102398872375488
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,1023,0.029315200448036195
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,1023,0.03132959902286529
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,2047,0.04273279905319214
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,2047,0.036766400933265685
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,4095,0.058601599931716916
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,4095,0.05682399868965149
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,8191,0.09396960139274597
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,8191,0.0833519995212555
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,16383,0.15431519746780395
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,16383,0.1344048023223877
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,32767,0.2748512029647827
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,65535,0.4288640022277832
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,32767,0.2333616018295288
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,1,0.019105599820613862
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,1,0.019620800018310548
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,65535,0.5319263935089111
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,3,0.0189968004822731
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,3,0.019607999920845033
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,7,0.018731200695037843
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,7,0.019864000380039215
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,15,0.018873600661754607
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,15,0.01980479955673218
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,31,0.01889760047197342
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,31,0.020051200687885285
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,63,0.018911999464035035
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,63,0.0196943998336792
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,127,0.019011199474334717
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,127,0.019636799395084382
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,255,0.020619200170040132
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,255,0.021718400716781616
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,511,0.023548799753189086
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,511,0.02582719922065735
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,1023,0.024318400025367736
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,1023,0.025363200902938844
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,2047,0.024564799666404725
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,2047,0.025751999020576476
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,4095,0.024668799340724946
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,4095,0.025987198948860167
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,8191,0.027955201268196107
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,8191,0.028171199560165405
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,16383,0.03420960009098053
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,16383,0.03466080129146576
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,32767,0.046296000480651855
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,32767,0.04081279933452606
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,65535,0.06407359838485718
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,65535,0.06112639904022217
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,1,0.0189423993229866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,1,0.019543999433517457
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,3,0.01902559995651245
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,3,0.019918400049209594
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,7,0.018990400433540344
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,7,0.019823999702930452
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,15,0.019049599766731262
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,15,0.01957920044660568
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,31,0.019126400351524353
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,31,0.020121599733829498
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,63,0.019091199338436126
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,63,0.019755199551582336
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,127,0.019204799830913544
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,127,0.019923199713230134
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,255,0.020681600272655486
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,255,0.02178560048341751
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,511,0.02374400049448013
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,511,0.02555679976940155
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,1023,0.02417919933795929
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,1023,0.025836798548698425
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,2047,0.024568000435829164
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,2047,0.02584800124168396
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,4095,0.026694399118423463
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,4095,0.02701759934425354
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,8191,0.031112000346183777
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,8191,0.03175680041313171
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,16383,0.042752000689506534
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,16383,0.03793599903583526
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,32767,0.060012799501419065
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,32767,0.05797920227050781
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,65535,0.09260159730911255
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,65535,0.0851967990398407
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,1,0.022720000147819518
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,1,0.023676800727844238
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,3,0.022856000065803527
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,3,0.023998400568962096
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,7,0.0232464000582695
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,7,0.023531199991703035
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,15,0.022777600586414336
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,15,0.023524799942970277
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,31,0.02295839935541153
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,31,0.023470400273799895
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,63,0.02279680073261261
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,63,0.02396800071001053
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,127,0.022868800163269042
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,127,0.02380319982767105
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,255,0.02452960014343262
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,255,0.025804799795150758
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,511,0.02771199941635132
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,511,0.029308798909187316
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,1023,0.0433023989200592
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,1023,0.037567999958992
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,2047,0.05856000185012818
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,2047,0.05565119981765747
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,4095,0.0914031982421875
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,4095,0.0810368001461029
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,8191,0.15272639989852904
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,8191,0.13226079940795898
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,16383,0.27549920082092283
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,16383,0.23131999969482422
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,1,0.026668798923492432
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,1,0.02844800055027008
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,32767,0.5115488052368165
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,32767,0.42610559463500974
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,3,0.026550400257110595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,3,0.027977600693702698
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,7,0.026587200164794923
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,7,0.028288000822067262
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,15,0.026531198620796205
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,15,0.02844800055027008
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,31,0.026731199026107787
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,31,0.028366398811340333
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,63,0.026574400067329407
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,63,0.0280784010887146
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,127,0.026688000559806822
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,127,0.028420799970626832
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,255,0.02980799973011017
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,255,0.03263199925422668
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,511,0.043609601259231565
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,511,0.03959839940071106
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,1023,0.06172320246696472
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,1023,0.06079040169715881
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,2047,0.09419360160827636
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,2047,0.08443999886512757
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,4095,0.1549999952316284
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,4095,0.1368783950805664
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,8191,0.27777440547943116
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,8191,0.23624958992004394
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,16383,0.5284207820892334
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,1,0.029788801074028017
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,16383,0.4316527843475342
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,1,0.031171199679374696
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,3,0.029896000027656557
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,32767,0.9891103744506836
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,3,0.03176159858703613
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,7,0.029761600494384765
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,32767,0.8183168411254883
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,7,0.030881598591804504
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,15,0.029793599247932435
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,15,0.03144960105419159
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,31,0.031856000423431396
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,31,0.03138239979743958
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,63,0.029788801074028017
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,63,0.03170560002326965
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,127,0.02948960065841675
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,127,0.03170560002326965
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,255,0.034564799070358275
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,255,0.03615359961986542
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,511,0.04715999960899353
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,511,0.042982399463653564
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,1023,0.06685919761657715
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,1023,0.06444640159606933
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,2047,0.09750080108642578
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,2047,0.0883184015750885
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,4095,0.15994240045547486
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,4095,0.140665602684021
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,8191,0.28099679946899414
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,1,0.03708159923553467
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,1,0.039980798959732056
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,8191,0.24000320434570313
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,3,0.03728480041027069
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,3,0.040696001052856444
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,7,0.037478399276733396
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,7,0.04026240110397339
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,15,0.03733760118484497
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,15,0.04015200138092041
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,31,0.03728959858417511
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,31,0.040222400426864625
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,63,0.03758560121059418
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,63,0.04034079909324646
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,127,0.037668800354003905
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,127,0.040545600652694705
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,255,0.05213760137557984
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,255,0.04855520129203796
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,511,0.06666719913482666
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,511,0.06928640007972717
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,1023,0.10204960107803344
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,1023,0.09779199957847595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,2047,0.16304320096969604
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,2047,0.14630240201950073
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,8191,0.5327087879180908
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,4095,0.28531200885772706
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,4095,0.24582879543304442
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,1,0.04385760128498077
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,1,0.04756639897823334
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,3,0.044519999623298646
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,3,0.04743840098381043
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,7,0.04437279999256134
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,8191,0.4434175968170166
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,7,0.04724319875240326
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,15,0.0441536009311676
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,15,0.04766240119934082
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,31,0.04421600103378296
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,31,0.047712001204490664
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,63,0.044495999813079834
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,63,0.047574400901794434
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,127,0.045105600357055665
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,127,0.047603198885917665
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,255,0.059406399726867676
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,255,0.0559935986995697
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,511,0.07504159808158875
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,511,0.07621600031852722
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,1023,0.11146080493927002
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,1023,0.10514400005340577
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,2047,0.17049599885940553
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,2047,0.15318880081176758
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,4095,0.2946608066558838
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,1,0.05873759984970093
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,4095,0.2554703950881958
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,1,0.06434080004692078
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,3,0.05881919860839844
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,3,0.06450240015983581
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,7,0.05867040157318115
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,7,0.06423199772834778
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,15,0.058518397808074954
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,15,0.06432960033416749
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,31,0.05863999724388123
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,31,0.0644208014011383
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,63,0.05914880037307739
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,63,0.06461920142173767
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,127,0.06592159867286682
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,127,0.06490079760551452
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,255,0.0832256019115448
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,255,0.08607680201530457
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,511,0.11320960521697998
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,511,0.11797920465469361
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,1023,0.18032959699630738
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,1023,0.17010719776153566
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,2047,0.2990976095199585
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,2047,0.2636048078536987
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,1,0.019142399728298187
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,1,0.0200655996799469
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,4095,0.5572080135345459
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,3,0.019393600523471832
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,4095,0.46723041534423826
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,3,0.02000479996204376
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,7,0.020214399695396422
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,7,0.019259199500083923
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,15,0.019167999923229217
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,15,0.020695999264717102
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,31,0.019356800615787505
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,31,0.020020799338817598
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,63,0.01903519928455353
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,63,0.020052799582481386
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,127,0.01932000070810318
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,127,0.01984799951314926
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,255,0.02125120013952255
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,255,0.02202560007572174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,511,0.023919999599456787
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,511,0.025484800338745117
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,1023,0.024592000246047973
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,1023,0.025996801257133485
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,2047,0.024910399317741395
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,2047,0.02613599896430969
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,4095,0.026556798815727235
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,4095,0.026876801252365114
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,8191,0.031204798817634584
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,8191,0.03222880065441132
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,16383,0.04571839869022369
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,16383,0.041065600514411923
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,32767,0.0637391984462738
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,32767,0.06056640148162842
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,65535,0.09726240038871765
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,1,0.019251200556755065
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,1,0.02011999934911728
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,65535,0.08816800117492676
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,3,0.01932159960269928
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,3,0.020105600357055664
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,7,0.019555200636386872
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,7,0.019942399859428406
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,15,0.0195375993847847
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,15,0.02014559954404831
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,31,0.019385600090026857
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,255,0.022006399929523468
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,31,0.0203232005238533
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,63,0.019548800587654114
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,63,0.020147199928760528
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,127,0.019411200284957887
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,127,0.020051200687885285
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,255,0.02129279971122742
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,511,0.024115200340747833
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,511,0.026009601354599
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,1023,0.024743999540805816
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,1023,0.026041600108146667
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,2047,0.02637920081615448
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,2047,0.026771199703216553
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,4095,0.029988801479339598
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,4095,0.030535998940467834
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,8191,0.042752000689506534
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,8191,0.03798879981040955
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,16383,0.05857759714126587
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,16383,0.05577279925346375
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,32767,0.09309920072555541
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,32767,0.08219360113143921
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,65535,0.15435359477996827
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,65535,0.13490079641342162
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,1,128,1,float16,float16,1,0.06627359986305237
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,1,128,1,float16,fp8,1,0.07126079797744751
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,1,128,1,float16,float16,3,0.06532639861106873
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,1,128,1,float16,fp8,3,0.07114400267601013
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,1,128,1,float16,float16,7,0.0658079981803894
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,1,128,1,float16,fp8,7,0.0712943971157074
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,1,128,1,float16,float16,15,0.06540480256080627
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,1,128,1,float16,fp8,15,0.07137920260429383
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,1,128,1,float16,float16,31,0.06610239744186401
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,1,128,1,float16,fp8,31,0.07149119973182679
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,1,128,1,float16,float16,63,0.06636639833450317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,1,128,1,float16,fp8,63,0.07121599912643432
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,1,128,1,float16,float16,127,0.07373759746551514
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,1,128,1,float16,fp8,127,0.07178080081939697
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,1,128,1,float16,float16,255,0.09101920127868653
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,1,128,1,float16,fp8,255,0.09433280229568482
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,1,128,1,float16,float16,511,0.12176480293273925
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,1,128,1,float16,fp8,511,0.12566720247268676
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,1,128,1,float16,float16,1023,0.18549760580062866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,1,128,1,float16,fp8,1023,0.17596479654312133
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,1,128,1,float16,float16,2047,0.3062432050704956
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,2,128,1,float16,float16,1,0.09419839978218078
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,1,128,1,float16,fp8,2047,0.2715167999267578
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,2,128,1,float16,fp8,1,0.10615999698638916
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,2,128,1,float16,float16,3,0.09472159743309021
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,2,128,1,float16,fp8,3,0.10627199411392212
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,2,128,1,float16,float16,7,0.09416639804840088
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,2,128,1,float16,fp8,7,0.10570240020751953
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,2,128,1,float16,float16,15,0.09428640007972718
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,2,128,1,float16,fp8,15,0.10614880323410034
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,2,128,1,float16,float16,31,0.0946016013622284
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,2,128,1,float16,fp8,31,0.10632959604263306
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,2,128,1,float16,float16,63,0.10162080526351928
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,2,128,1,float16,fp8,63,0.10846240520477295
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,2,128,1,float16,float16,127,0.10476640462875367
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,2,128,1,float16,fp8,127,0.11581599712371826
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,2,128,1,float16,float16,255,0.13809759616851808
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,2,128,1,float16,fp8,255,0.14502559900283812
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,2,128,1,float16,float16,511,0.19350719451904297
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,2,128,1,float16,fp8,511,0.2087552070617676
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,2,128,1,float16,float16,1023,0.3172559976577759
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,2,128,1,float16,fp8,1023,0.30599040985107423
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,1,128,1,float16,float16,3,0.10572320222854614
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,1,128,1,float16,float16,1,0.10607839822769165
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,1,128,1,float16,fp8,1,0.11732640266418456
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,2,128,1,float16,float16,2047,0.5618527889251709
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,1,128,1,float16,float16,15,0.10625439882278442
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,4,2,128,1,float16,fp8,2047,0.49203038215637207
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,1,128,1,float16,fp8,3,0.11715519428253174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,1,128,1,float16,float16,31,0.10738559961318969
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,1,128,1,float16,float16,7,0.10532480478286743
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,1,128,1,float16,fp8,7,0.11783519983291627
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,1,128,1,float16,fp8,15,0.11734559535980224
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,1,128,1,float16,fp8,31,0.11769759654998779
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,1,128,1,float16,float16,63,0.11615519523620606
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,1,128,1,float16,fp8,63,0.11967040300369262
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,1,128,1,float16,float16,127,0.11807039976119996
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,1,128,1,float16,fp8,127,0.12828320264816284
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,1,128,1,float16,float16,255,0.14972319602966308
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,1,128,1,float16,fp8,255,0.1567855954170227
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,2,128,1,float16,float16,1,0.16176480054855347
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,1,128,1,float16,float16,511,0.2058095932006836
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,1,128,1,float16,fp8,511,0.22094719409942626
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,1,128,1,float16,float16,1023,0.3302223920822144
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,1,128,1,float16,fp8,1023,0.3190000057220459
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,2,128,1,float16,fp8,1,0.18471360206604004
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,2,128,1,float16,float16,3,0.16110880374908448
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,2,128,1,float16,fp8,3,0.18450720310211183
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,2,128,1,float16,float16,7,0.16198719739913942
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,2,128,1,float16,fp8,7,0.18470720052719117
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,2,128,1,float16,float16,15,0.16164480447769164
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,2,128,1,float16,fp8,15,0.1847808003425598
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,2,128,1,float16,float16,31,0.16620639562606812
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,2,128,1,float16,fp8,31,0.1848960041999817
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,2,128,1,float16,float16,63,0.17011359930038453
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,2,128,1,float16,fp8,63,0.19709759950637817
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,2,128,1,float16,float16,127,0.17634400129318237
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,2,128,1,float16,fp8,127,0.20150721073150635
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,2,128,1,float16,float16,255,0.23559839725494386
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,2,128,1,float16,fp8,255,0.2555423974990845
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,2,128,1,float16,float16,511,0.3426176071166992
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,2,128,1,float16,fp8,511,0.3818703889846802
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,1,0.019457599520683287
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,1,0.020044800639152528
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,2,128,1,float16,float16,1023,0.6037295818328857
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,3,0.019444799423217772
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,3,0.020027199387550355
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,4,2,128,1,float16,fp8,1023,0.5730016231536865
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,7,0.01958560049533844
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,7,0.020304000377655028
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,15,0.019519999623298645
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,15,0.020185600221157073
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,31,0.019582399725914003
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,31,0.020070399343967437
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,63,0.019284799695014954
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,63,0.02024960070848465
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,127,0.01972319930791855
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,127,0.02036159932613373
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,255,0.021201600134372712
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,255,0.022510400414466857
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,511,0.023951999843120575
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,511,0.025979200005531312
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,1023,0.024828800559043886
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,1023,0.026291200518608095
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,2047,0.026915198564529418
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,2047,0.02693760097026825
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,4095,0.03065280020236969
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,4095,0.03075360059738159
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,8191,0.042977601289749146
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,8191,0.03861599862575531
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,16383,0.06181600093841553
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,16383,0.05935840010643005
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,32767,0.09610880017280579
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,32767,0.08487679958343505
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,65535,0.15728960037231446
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,65535,0.1378656029701233
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,1,0.0197952002286911
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,15,0.0196943998336792
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,1,0.020664000511169435
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,3,0.019524799287319185
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,3,0.02040800005197525
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,7,0.01971679925918579
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,7,0.020399999618530274
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,15,0.020436799526214598
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,31,0.019414399564266206
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,31,0.02056480050086975
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,63,0.01969760060310364
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,63,0.020547200739383698
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,127,0.0196943998336792
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,127,0.0205935999751091
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,255,0.02136480063199997
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,255,0.022465600073337554
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,511,0.024083200097084045
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,511,0.026075199246406555
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,1023,0.02680320143699646
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,1023,0.02662239968776703
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,2047,0.02869440019130707
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,2047,0.03081279993057251
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,4095,0.041566398739814756
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,4095,0.036646398901939395
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,8191,0.05937119722366333
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,8191,0.05660960078239441
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,16383,0.09240480065345764
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,16383,0.08181920051574706
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,32767,0.1540176033973694
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,32767,0.13373279571533203
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,65535,0.27769761085510253
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,1,0.020956799387931824
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,65535,0.23305280208587648
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,1,0.021958400309085847
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,3,0.021063999831676485
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,3,0.0220208004117012
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,7,0.020857599377632142
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,7,0.02205280065536499
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,15,0.021048000454902648
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,15,0.021639999747276307
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,31,0.021113599836826324
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,31,0.021852800250053407
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,63,0.020819200575351714
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,63,0.021750399470329286
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,127,0.020875200629234314
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,127,0.021804800629615782
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,255,0.02268480062484741
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,255,0.023636800050735474
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,511,0.0261135995388031
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,511,0.02757599949836731
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,1023,0.0302592009305954
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,1023,0.031856000423431396
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,2047,0.04346719980239868
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,2047,0.03803679943084717
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,16383,0.15933120250701904
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,4095,0.059033602476119995
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,4095,0.056561601161956784
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,8191,0.09448320269584656
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,8191,0.08351200222969055
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,16383,0.1398159980773926
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,32767,0.2822479963302612
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,32767,0.23981280326843263
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,1,0.016310399770736693
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,65535,0.5180943965911865
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,65535,0.4330304145812988
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,1,0.01716800034046173
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,3,0.01639360040426254
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,3,0.017192000150680543
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,7,0.016232000291347505
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,131071,0.995792007446289
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,7,0.01703999936580658
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,131071,0.8189231872558593
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,15,0.016171200573444365
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,15,0.017324799299240114
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,31,0.016364799439907075
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,31,0.017334400117397307
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,63,0.016096000373363496
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,63,0.017558400332927705
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,127,0.016044799983501435
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,255,0.01799200028181076
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,127,0.017315199971199034
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,255,0.019150400161743165
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,511,0.020745599269866945
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,511,0.023284800350666046
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,1023,0.021536000072956085
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,1023,0.02324959933757782
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,2047,0.021836799383163453
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,2047,0.023160000145435334
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,4095,0.022040000557899474
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,4095,0.023379200696945192
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,8191,0.023638400435447692
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,8191,0.025286400318145753
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,16383,0.027134400606155396
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,16383,0.029068800806999206
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,32767,0.02911199927330017
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,32767,0.029785600304603577
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,65535,0.033046400547027587
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,65535,0.03345920145511627
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,131071,0.04476799964904785
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,131071,0.04103519916534424
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,1,0.01693120002746582
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,1,0.018060800433158875
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,3,0.01720480024814606
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,3,0.017878399789333345
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,7,0.017321600019931792
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,7,0.018160000443458557
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,15,0.017067199945449828
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,15,0.01823360025882721
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,31,0.016947199404239655
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,31,0.01834239959716797
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,63,0.017212800681591034
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,63,0.018115200102329254
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,127,0.017401599884033205
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,127,0.017843200266361235
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,255,0.018801599740982056
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,255,0.02016319930553436
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,511,0.02159679979085922
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,511,0.02431199997663498
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,1023,0.022867199778556824
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,1023,0.02419999986886978
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,2047,0.02268799990415573
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,2047,0.024220800399780272
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,4095,0.022550399601459502
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,4095,0.024396799504756927
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,8191,0.02508159875869751
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,8191,0.02630240023136139
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,16383,0.03011679947376251
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,16383,0.029953598976135254
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,32767,0.033188799023628236
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,32767,0.0338591992855072
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,65535,0.0437855988740921
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,65535,0.03975360095500946
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,131071,0.061159998178482056
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,131071,0.05900480151176453
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,1,0.019969600439071655
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,1,0.020611199736595153
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,3,0.020019200444221497
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,3,0.020921599864959717
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,7,0.01987680047750473
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,7,0.020630399882793426
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,15,0.020095999538898467
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,15,0.02128159999847412
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,31,0.019990399479866028
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,31,0.020771199464797975
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,63,0.019985599815845488
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,63,0.02096640020608902
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,127,0.020185600221157073
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,127,0.02072480022907257
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,255,0.021745599806308746
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,255,0.022724799811840057
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,511,0.024988800287246704
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,511,0.027004799246788024
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,1023,0.02717120051383972
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,1023,0.02738879919052124
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,2047,0.029105600714683533
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,2047,0.031318399310112
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,4095,0.04237279891967773
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,4095,0.03704800009727478
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,8191,0.06007999777793884
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,8191,0.05783200263977051
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,16383,0.09721919894218445
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,16383,0.08668479919433594
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,32767,0.15817919969558716
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,32767,0.13954720497131348
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,65535,0.281931209564209
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,131071,0.43205437660217283
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,65535,0.23782880306243898
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,1,0.01892479956150055
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,1,0.0197952002286911
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,131071,0.5194784164428711
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,3,0.018969599902629853
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,3,0.01996160000562668
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,7,0.019089600443840025
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,7,0.01992799937725067
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,15,0.018849599361419677
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,15,0.019724799692630766
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,31,0.01911199986934662
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,31,0.01987999975681305
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,63,0.019079999625682832
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,63,0.019766399264335634
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,127,0.018993599712848662
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,127,0.019763199985027312
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,255,0.020500800013542174
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,255,0.021807999908924104
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,511,0.02362080067396164
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,511,0.025740799307823182
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,1023,0.02455199956893921
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,1023,0.025579199194908142
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,2047,0.02446240037679672
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,2047,0.025923201441764833
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,4095,0.024694399535655977
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,4095,0.026078400015830994
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,8191,0.02852480113506317
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,8191,0.028494399785995484
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,16383,0.034980800747871396
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,16383,0.03524320125579834
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,32767,0.04603840112686157
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,32767,0.04141600131988525
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,65535,0.06307520270347595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,65535,0.060782402753829956
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,131071,0.09694880247116089
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,131071,0.08897439837455749
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,1,0.023179200291633607
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,1,0.024320000410079957
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,3,0.023160000145435334
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,3,0.02354400008916855
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,31,0.023691199719905853
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,7,0.023052799701690673
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,7,0.02380480021238327
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,15,0.022843199968338012
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,15,0.02351360023021698
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,31,0.022814400494098663
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,63,0.023227199912071228
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,63,0.02356639951467514
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,127,0.02311680018901825
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,127,0.024014399945735933
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,255,0.024689599871635437
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,255,0.026001599431037904
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,511,0.028228801488876343
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,511,0.02959040105342865
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,4095,0.09193919897079468
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,1023,0.04320319890975952
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,1023,0.037083199620246886
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,2047,0.05936480164527893
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,2047,0.05585439801216126
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,4095,0.08173919916152954
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,8191,0.15250240564346312
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,8191,0.13378560543060303
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,16383,0.2768752098083496
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,16383,0.23204479217529297
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,32767,0.5104928016662598
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,1,0.029707199335098265
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,32767,0.4246960163116455
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,1,0.031150400638580322
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,3,0.02964160144329071
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,3,0.031350401043891904
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,7,0.02954559922218323
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,65535,0.9945296287536621
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,7,0.03115360140800476
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,65535,0.8161231994628906
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,15,0.029475200176239013
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,15,0.03139199912548065
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,31,0.029791998863220214
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,31,0.031272000074386595
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,63,0.030011200904846193
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,63,0.031404799222946166
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,127,0.029843199253082275
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,127,0.03147040009498596
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,255,0.032892799377441405
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,255,0.035195198655128476
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,511,0.046910399198532106
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,511,0.044075199961662294
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,1023,0.06609759926795959
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,1023,0.06457120180130005
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,2047,0.09838240146636963
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,2047,0.08816959857940673
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,4095,0.15815999507904052
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,1,0.04417119920253754
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,16383,0.5138239860534668
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,4095,0.13994719982147216
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,8191,0.28099679946899414
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,8191,0.24305119514465331
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,1,0.04691520035266876
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,3,0.04415520131587982
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,16383,0.4328864097595215
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,3,0.0470335990190506
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,7,0.044059199094772336
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,7,0.04758720099925995
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,15,0.04434880018234253
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,15,0.047495999932289125
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,31,0.04392159879207611
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,31,0.04746719896793365
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,63,0.04432959854602814
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,63,0.04721759855747223
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,127,0.04425120055675506
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,127,0.04746879935264588
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,255,0.05978400111198425
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,255,0.05721120238304138
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,511,0.07495200037956237
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,511,0.07645440101623535
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,1023,0.11067359447479248
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,1023,0.10527839660644531
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,2047,0.16948479413986206
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,2047,0.15342719554901124
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,4095,0.2926431894302368
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,4095,0.2570528030395508
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,1,0.018987199664115904
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,1,0.02016959935426712
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,8191,0.5276959896087646
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,3,0.019092799723148347
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,8191,0.45299358367919923
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,31,0.019200000166893005
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,3,0.020241600275039674
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,7,0.0192671999335289
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,7,0.02016319930553436
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,15,0.01934880018234253
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,15,0.020136000216007234
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,31,0.019755199551582336
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,63,0.01932000070810318
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,63,0.020076799392700195
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,127,0.019467200338840484
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,127,0.019883200526237488
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,255,0.02099999934434891
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,255,0.022195200622081756
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,511,0.023958399891853333
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,511,0.02571519911289215
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,1023,0.02492000013589859
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,1023,0.026295998692512514
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,2047,0.02510400116443634
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,2047,0.02617279887199402
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,4095,0.026849600672721862
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,4095,0.026681599020957947
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,8191,0.03193280100822449
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,8191,0.03237760066986084
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,16383,0.04659520089626312
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,16383,0.04184480011463165
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,32767,0.06406559944152831
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,32767,0.061326402425765994
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,65535,0.09811679720878601
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,65535,0.08860480189323425
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,131071,0.16087039709091186
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,131071,0.1422752022743225
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,1,0.06584640145301819
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,1,0.07153279781341552
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,3,0.06530719995498657
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,3,0.0710096001625061
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,7,0.06539999842643737
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,7,0.07144160270690918
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,15,0.06522560119628906
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,63,0.07140640020370484
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,15,0.07182080149650574
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,31,0.06565600037574768
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,31,0.07153599858283996
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,63,0.06721280217170715
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,127,0.07419360280036927
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,127,0.07228959798812866
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,255,0.09131199717521668
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,255,0.0944271981716156
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,511,0.12106720209121705
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,511,0.12643200159072876
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,1023,0.18551520109176636
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,1023,0.17697919607162477
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,2047,0.3036303997039795
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,2047,0.271014404296875
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,1,128,1,float16,float16,1,0.10644160509109497
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,4095,0.5385503768920898
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,1,128,1,float16,fp8,1,0.11733440160751343
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,1,128,1,float16,float16,3,0.10545920133590699
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,4095,0.4691152095794678
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,1,128,1,float16,fp8,3,0.11713440418243408
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,1,128,1,float16,float16,7,0.10512000322341919
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,1,128,1,float16,fp8,7,0.11757279634475708
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,1,128,1,float16,float16,63,0.11373759508132934
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,1,128,1,float16,float16,15,0.10623199939727783
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,1,128,1,float16,fp8,15,0.11756160259246826
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,1,128,1,float16,float16,255,0.1480831980705261
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,1,128,1,float16,float16,31,0.10597920417785645
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,1,128,1,float16,fp8,31,0.11754399538040161
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,1,128,1,float16,fp8,63,0.11758400201797485
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,1,128,1,float16,float16,127,0.11750719547271729
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,1,128,1,float16,fp8,127,0.12796000242233277
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,1,128,1,float16,fp8,255,0.15803680419921876
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,1,128,1,float16,float16,511,0.2055504083633423
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,1,0.020508800446987153
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,1,128,1,float16,fp8,511,0.22085120677947997
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,1,128,1,float16,float16,1023,0.32900800704956057
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,1,128,1,float16,fp8,1023,0.3213007926940918
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,1,0.019470399618148802
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,1,128,1,float16,float16,2047,0.5610671997070312
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,3,0.019513599574565887
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,2048,1,2,1,128,1,float16,fp8,2047,0.506328010559082
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,3,0.020180800557136537
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,7,0.01937279999256134
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,7,0.020078399777412416
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,15,0.02019519954919815
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,15,0.019593599438667297
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,31,0.019356800615787505
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,31,0.02028000056743622
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,63,0.019324800372123717
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,63,0.020295999944210052
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,127,0.019244800508022308
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,127,0.02027679979801178
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,255,0.021033599972724915
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,255,0.0224031999707222
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,511,0.024107199907302857
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,511,0.025703999400138854
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,1023,0.024864000082015992
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,1023,0.026552000641822816
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,2047,0.026627200841903686
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,2047,0.026924800872802735
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,4095,0.03028160035610199
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,4095,0.030638399720191955
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,8191,0.04315040111541748
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,8191,0.038395199179649356
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,16383,0.06311519742012024
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,32767,0.09651359915733337
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,16383,0.06019359827041626
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,32767,0.0859824001789093
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,65535,0.15865119695663452
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,65535,0.1383280038833618
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,131071,0.28175039291381837
SGLang,0.5.6.post2,NVIDIA H200,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,131071,0.2383647918701172
